From 79ea757a8a1684e4f3148fad15466c5c433d9467 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 26 Dec 2008 22:41:18 +0100 Subject: [PATCH] --- yaml --- r: 125425 b: refs/heads/master c: cfb2a494bb7dca9cf8d1632fbed14b34db051980 h: refs/heads/master i: 125423: 16ffdefdd71883f86daf5ab2d85ddf28ed699fdb v: v3 --- [refs] | 2 +- trunk/Documentation/cpu-hotplug.txt | 17 +- trunk/MAINTAINERS | 2 + trunk/arch/alpha/include/asm/smp.h | 1 + trunk/arch/alpha/kernel/irq.c | 2 +- trunk/arch/alpha/kernel/process.c | 2 - trunk/arch/alpha/kernel/smp.c | 7 +- trunk/arch/alpha/kernel/sys_dp264.c | 8 +- trunk/arch/alpha/kernel/sys_titan.c | 4 +- trunk/arch/arm/common/gic.c | 4 +- trunk/arch/arm/kernel/irq.c | 2 +- trunk/arch/arm/kernel/smp.c | 10 + trunk/arch/arm/mach-at91/at91rm9200_time.c | 3 +- trunk/arch/arm/mach-at91/at91sam926x_time.c | 2 +- trunk/arch/arm/mach-davinci/time.c | 2 +- trunk/arch/arm/mach-imx/time.c | 2 +- trunk/arch/arm/mach-ixp4xx/common.c | 2 +- trunk/arch/arm/mach-msm/timer.c | 2 +- trunk/arch/arm/mach-ns9xxx/time-ns9360.c | 2 +- trunk/arch/arm/mach-omap1/time.c | 2 +- trunk/arch/arm/mach-omap1/timer32k.c | 2 +- trunk/arch/arm/mach-omap2/timer-gp.c | 2 +- trunk/arch/arm/mach-pxa/time.c | 2 +- trunk/arch/arm/mach-realview/core.c | 2 +- trunk/arch/arm/mach-realview/localtimer.c | 4 +- trunk/arch/arm/mach-sa1100/time.c | 2 +- trunk/arch/arm/mach-versatile/core.c | 2 +- trunk/arch/arm/oprofile/op_model_mpcore.c | 4 +- trunk/arch/arm/plat-mxc/time.c | 2 +- trunk/arch/arm/plat-orion/time.c | 2 +- trunk/arch/avr32/kernel/time.c | 2 +- trunk/arch/blackfin/kernel/time-ts.c | 2 +- trunk/arch/cris/arch-v32/kernel/irq.c | 4 +- trunk/arch/cris/arch-v32/kernel/smp.c | 4 + trunk/arch/cris/include/asm/smp.h | 1 + trunk/arch/ia64/hp/sim/hpsim_irq.c | 2 +- trunk/arch/ia64/include/asm/kvm.h | 6 +- trunk/arch/ia64/include/asm/kvm_host.h | 196 +-- trunk/arch/ia64/include/asm/smp.h | 1 + trunk/arch/ia64/include/asm/topology.h | 2 + trunk/arch/ia64/kernel/iosapic.c | 12 +- trunk/arch/ia64/kernel/irq.c | 9 +- trunk/arch/ia64/kernel/msi_ia64.c | 12 +- trunk/arch/ia64/kernel/smpboot.c | 10 +- trunk/arch/ia64/kernel/topology.c | 2 +- trunk/arch/ia64/kvm/Makefile | 2 +- trunk/arch/ia64/kvm/asm-offsets.c | 11 +- trunk/arch/ia64/kvm/kvm-ia64.c | 107 +- trunk/arch/ia64/kvm/kvm_lib.c | 15 - trunk/arch/ia64/kvm/kvm_minstate.h | 4 +- trunk/arch/ia64/kvm/misc.h | 3 +- trunk/arch/ia64/kvm/mmio.c | 38 +- trunk/arch/ia64/kvm/process.c | 29 +- trunk/arch/ia64/kvm/vcpu.c | 76 +- trunk/arch/ia64/kvm/vcpu.h | 5 +- trunk/arch/ia64/kvm/vmm.c | 29 - trunk/arch/ia64/kvm/vmm_ivt.S | 1469 +++++++++-------- trunk/arch/ia64/kvm/vtlb.c | 4 +- trunk/arch/ia64/sn/kernel/irq.c | 6 +- trunk/arch/ia64/sn/kernel/msi_sn.c | 7 +- trunk/arch/m32r/Kconfig | 1 - trunk/arch/m32r/kernel/smpboot.c | 6 + trunk/arch/m68k/Kconfig | 1 - trunk/arch/m68knommu/platform/coldfire/pit.c | 2 +- trunk/arch/mips/include/asm/irq.h | 3 +- .../mips/include/asm/mach-ip27/topology.h | 1 + trunk/arch/mips/include/asm/smp.h | 3 + trunk/arch/mips/jazz/irq.c | 2 +- trunk/arch/mips/kernel/cevt-bcm1480.c | 4 +- trunk/arch/mips/kernel/cevt-ds1287.c | 2 +- trunk/arch/mips/kernel/cevt-gt641xx.c | 2 +- trunk/arch/mips/kernel/cevt-r4k.c | 2 +- trunk/arch/mips/kernel/cevt-sb1250.c | 4 +- trunk/arch/mips/kernel/cevt-smtc.c | 2 +- trunk/arch/mips/kernel/cevt-txx9.c | 2 +- trunk/arch/mips/kernel/i8253.c | 2 +- trunk/arch/mips/kernel/irq-gic.c | 6 +- trunk/arch/mips/kernel/smp-cmp.c | 6 +- trunk/arch/mips/kernel/smp-mt.c | 2 +- trunk/arch/mips/kernel/smp.c | 7 +- trunk/arch/mips/kernel/smtc.c | 6 +- trunk/arch/mips/mti-malta/malta-smtc.c | 6 +- trunk/arch/mips/nxp/pnx8550/common/time.c | 1 - trunk/arch/mips/pmc-sierra/yosemite/smp.c | 6 +- trunk/arch/mips/sgi-ip27/ip27-smp.c | 2 +- trunk/arch/mips/sgi-ip27/ip27-timer.c | 2 +- trunk/arch/mips/sibyte/bcm1480/irq.c | 8 +- trunk/arch/mips/sibyte/bcm1480/smp.c | 8 +- trunk/arch/mips/sibyte/sb1250/irq.c | 8 +- trunk/arch/mips/sibyte/sb1250/smp.c | 8 +- trunk/arch/mips/sni/time.c | 2 +- trunk/arch/parisc/Kconfig | 1 - trunk/arch/parisc/kernel/irq.c | 6 +- trunk/arch/parisc/kernel/smp.c | 15 + trunk/arch/powerpc/include/asm/disassemble.h | 80 - trunk/arch/powerpc/include/asm/kvm_44x.h | 61 - trunk/arch/powerpc/include/asm/kvm_host.h | 116 +- trunk/arch/powerpc/include/asm/kvm_ppc.h | 83 +- trunk/arch/powerpc/include/asm/mmu-44x.h | 1 - trunk/arch/powerpc/include/asm/topology.h | 1 + trunk/arch/powerpc/kernel/asm-offsets.c | 21 +- trunk/arch/powerpc/kernel/irq.c | 2 +- trunk/arch/powerpc/kernel/smp.c | 4 + trunk/arch/powerpc/kernel/time.c | 2 +- trunk/arch/powerpc/kvm/44x.c | 228 --- trunk/arch/powerpc/kvm/44x_emulate.c | 371 ----- trunk/arch/powerpc/kvm/44x_tlb.c | 463 ++---- trunk/arch/powerpc/kvm/44x_tlb.h | 26 +- trunk/arch/powerpc/kvm/Kconfig | 28 +- trunk/arch/powerpc/kvm/Makefile | 12 +- trunk/arch/powerpc/kvm/booke.h | 60 - .../powerpc/kvm/{booke.c => booke_guest.c} | 418 ++--- trunk/arch/powerpc/kvm/booke_host.c | 83 + trunk/arch/powerpc/kvm/booke_interrupts.S | 72 +- trunk/arch/powerpc/kvm/emulate.c | 447 ++++- trunk/arch/powerpc/kvm/powerpc.c | 130 +- trunk/arch/powerpc/kvm/timing.c | 239 --- trunk/arch/powerpc/kvm/timing.h | 102 -- trunk/arch/powerpc/platforms/pseries/xics.c | 4 +- trunk/arch/powerpc/sysdev/mpic.c | 4 +- trunk/arch/powerpc/sysdev/mpic.h | 2 +- trunk/arch/s390/Kconfig | 1 - trunk/arch/s390/kernel/smp.c | 6 + trunk/arch/s390/kernel/time.c | 2 +- trunk/arch/s390/kvm/kvm-s390.c | 41 +- trunk/arch/sh/include/asm/smp.h | 2 +- trunk/arch/sh/include/asm/topology.h | 1 + trunk/arch/sh/kernel/smp.c | 10 +- trunk/arch/sh/kernel/timers/timer-broadcast.c | 2 +- trunk/arch/sh/kernel/timers/timer-tmu.c | 2 +- trunk/arch/sparc/include/asm/smp_32.h | 2 + trunk/arch/sparc/kernel/irq_64.c | 11 +- trunk/arch/sparc/kernel/of_device_64.c | 2 +- trunk/arch/sparc/kernel/pci_msi.c | 2 +- trunk/arch/sparc/kernel/smp_32.c | 6 +- trunk/arch/sparc/kernel/smp_64.c | 4 + trunk/arch/sparc/kernel/sparc_ksyms_32.c | 4 + trunk/arch/sparc/kernel/time_64.c | 2 +- trunk/arch/um/kernel/smp.c | 7 + trunk/arch/um/kernel/time.c | 2 +- trunk/arch/x86/Kconfig | 13 +- trunk/arch/x86/ia32/ia32_signal.c | 3 +- trunk/arch/x86/ia32/ipc32.c | 1 - trunk/arch/x86/ia32/sys_ia32.c | 2 +- trunk/arch/x86/include/asm/apic.h | 3 +- trunk/arch/x86/include/asm/bigsmp/apic.h | 32 +- trunk/arch/x86/include/asm/bigsmp/ipi.h | 13 +- trunk/arch/x86/include/asm/desc.h | 10 +- trunk/arch/x86/include/asm/efi.h | 1 - trunk/arch/x86/include/asm/es7000/apic.h | 82 +- trunk/arch/x86/include/asm/es7000/ipi.h | 12 +- trunk/arch/x86/include/asm/genapic_32.h | 13 +- trunk/arch/x86/include/asm/genapic_64.h | 14 +- trunk/arch/x86/include/asm/ipi.h | 23 +- trunk/arch/x86/include/asm/irq.h | 3 +- trunk/arch/x86/include/asm/kvm_host.h | 45 +- trunk/arch/x86/include/asm/kvm_x86_emulate.h | 11 +- .../x86/include/asm/mach-default/mach_apic.h | 28 +- .../x86/include/asm/mach-default/mach_ipi.h | 18 +- .../x86/include/asm/mach-generic/mach_apic.h | 1 - trunk/arch/x86/include/asm/mpspec.h | 2 +- trunk/arch/x86/include/asm/mtrr.h | 25 - trunk/arch/x86/include/asm/numaq/apic.h | 12 +- trunk/arch/x86/include/asm/numaq/ipi.h | 13 +- trunk/arch/x86/include/asm/smp.h | 6 +- trunk/arch/x86/include/asm/summit/apic.h | 55 +- trunk/arch/x86/include/asm/summit/ipi.h | 9 +- trunk/arch/x86/include/asm/sys_ia32.h | 101 -- trunk/arch/x86/include/asm/topology.h | 2 - trunk/arch/x86/include/asm/uv/uv_bau.h | 46 +- trunk/arch/x86/include/asm/virtext.h | 132 -- trunk/arch/x86/kernel/amd_iommu.c | 2 +- trunk/arch/x86/kernel/amd_iommu_init.c | 4 +- trunk/arch/x86/kernel/apic.c | 42 +- trunk/arch/x86/kernel/bios_uv.c | 2 +- trunk/arch/x86/kernel/cpu/intel_cacheinfo.c | 45 +- trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 108 +- trunk/arch/x86/kernel/cpu/mtrr/generic.c | 12 +- trunk/arch/x86/kernel/cpu/mtrr/main.c | 10 +- trunk/arch/x86/kernel/cpu/mtrr/mtrr.h | 18 +- trunk/arch/x86/kernel/cpuid.c | 6 +- trunk/arch/x86/kernel/crash.c | 18 - trunk/arch/x86/kernel/early_printk.c | 2 +- trunk/arch/x86/kernel/genapic_flat_64.c | 107 +- trunk/arch/x86/kernel/genx2apic_cluster.c | 81 +- trunk/arch/x86/kernel/genx2apic_phys.c | 78 +- trunk/arch/x86/kernel/genx2apic_uv_x.c | 61 +- trunk/arch/x86/kernel/head64.c | 2 +- trunk/arch/x86/kernel/hpet.c | 8 +- trunk/arch/x86/kernel/i8253.c | 2 +- trunk/arch/x86/kernel/io_apic.c | 372 +++-- trunk/arch/x86/kernel/ipi.c | 28 +- trunk/arch/x86/kernel/irq.c | 3 - trunk/arch/x86/kernel/irq_32.c | 13 +- trunk/arch/x86/kernel/irq_64.c | 15 +- trunk/arch/x86/kernel/irqinit_32.c | 16 +- trunk/arch/x86/kernel/irqinit_64.c | 13 - trunk/arch/x86/kernel/kvmclock.c | 10 +- trunk/arch/x86/kernel/ldt.c | 4 +- trunk/arch/x86/kernel/mfgpt_32.c | 2 +- trunk/arch/x86/kernel/mmconf-fam10h_64.c | 3 +- trunk/arch/x86/kernel/mpparse.c | 10 +- trunk/arch/x86/kernel/nmi.c | 3 +- trunk/arch/x86/kernel/pci-gart_64.c | 2 +- trunk/arch/x86/kernel/reboot.c | 69 +- trunk/arch/x86/kernel/setup_percpu.c | 19 +- trunk/arch/x86/kernel/smp.c | 8 +- trunk/arch/x86/kernel/smpboot.c | 33 +- trunk/arch/x86/kernel/tlb_32.c | 2 +- trunk/arch/x86/kernel/tlb_64.c | 2 +- trunk/arch/x86/kernel/tlb_uv.c | 9 + trunk/arch/x86/kernel/traps.c | 45 +- trunk/arch/x86/kernel/vmiclock_32.c | 2 +- trunk/arch/x86/kernel/xsave.c | 2 +- trunk/arch/x86/kvm/i8254.c | 19 - trunk/arch/x86/kvm/i8259.c | 52 +- trunk/arch/x86/kvm/irq.h | 6 - trunk/arch/x86/kvm/kvm_svm.h | 2 +- trunk/arch/x86/kvm/lapic.c | 58 +- trunk/arch/x86/kvm/mmu.c | 444 +---- trunk/arch/x86/kvm/paging_tmpl.h | 44 +- trunk/arch/x86/kvm/svm.c | 48 +- trunk/arch/x86/{include/asm => kvm}/svm.h | 0 trunk/arch/x86/kvm/vmx.c | 350 ++-- trunk/arch/x86/{include/asm => kvm}/vmx.h | 27 +- trunk/arch/x86/kvm/x86.c | 117 +- trunk/arch/x86/kvm/x86_emulate.c | 297 ++-- trunk/arch/x86/lguest/boot.c | 2 +- trunk/arch/x86/mach-default/setup.c | 15 +- trunk/arch/x86/mach-generic/bigsmp.c | 5 +- trunk/arch/x86/mach-generic/es7000.c | 5 +- trunk/arch/x86/mach-generic/numaq.c | 5 +- trunk/arch/x86/mach-generic/summit.c | 5 +- trunk/arch/x86/mach-voyager/voyager_smp.c | 9 +- trunk/arch/x86/mm/numa_64.c | 4 +- trunk/arch/x86/mm/srat_64.c | 2 +- trunk/arch/x86/pci/acpi.c | 2 +- trunk/arch/x86/pci/amd_bus.c | 2 +- trunk/arch/x86/pci/common.c | 3 +- trunk/arch/x86/pci/direct.c | 2 +- trunk/arch/x86/pci/early.c | 2 +- trunk/arch/x86/pci/fixup.c | 3 +- trunk/arch/x86/pci/i386.c | 2 +- trunk/arch/x86/pci/init.c | 2 +- trunk/arch/x86/pci/irq.c | 3 +- trunk/arch/x86/pci/legacy.c | 2 +- trunk/arch/x86/pci/mmconfig-shared.c | 3 +- trunk/arch/x86/pci/mmconfig_32.c | 2 +- trunk/arch/x86/pci/mmconfig_64.c | 3 +- trunk/arch/x86/pci/numaq_32.c | 2 +- trunk/arch/x86/pci/olpc.c | 2 +- trunk/arch/x86/pci/pcbios.c | 5 +- .../x86/{include/asm/pci_x86.h => pci/pci.h} | 17 +- trunk/arch/x86/pci/visws.c | 3 +- trunk/arch/x86/xen/mmu.c | 20 +- trunk/arch/x86/xen/smp.c | 27 +- trunk/arch/x86/xen/suspend.c | 3 +- trunk/arch/x86/xen/time.c | 2 +- trunk/arch/x86/xen/xen-ops.h | 2 +- trunk/drivers/base/cpu.c | 2 +- trunk/drivers/base/node.c | 4 +- trunk/drivers/base/topology.c | 4 +- trunk/drivers/clocksource/tcb_clksrc.c | 2 +- trunk/drivers/lguest/interrupts_and_traps.c | 13 +- trunk/drivers/parisc/iosapic.c | 7 +- trunk/drivers/pci/hotplug/cpqphp_core.c | 2 +- trunk/drivers/pci/hotplug/cpqphp_pci.c | 2 +- trunk/drivers/pci/hotplug/ibmphp_core.c | 2 +- trunk/drivers/pci/pci-sysfs.c | 4 +- trunk/drivers/pci/probe.c | 4 +- trunk/drivers/xen/events.c | 6 +- trunk/fs/anon_inodes.c | 7 +- trunk/include/asm-generic/topology.h | 14 +- trunk/include/asm-m32r/smp.h | 2 + trunk/include/linux/clockchips.h | 4 +- trunk/include/linux/compiler-gcc3.h | 4 - trunk/include/linux/cpumask.h | 98 +- trunk/include/linux/interrupt.h | 4 +- trunk/include/linux/irq.h | 3 +- trunk/include/linux/kvm.h | 18 - trunk/include/linux/kvm_host.h | 12 +- trunk/include/linux/sched.h | 92 +- trunk/include/linux/topology.h | 6 +- trunk/init/Kconfig | 9 - trunk/kernel/cpu.c | 11 +- trunk/kernel/cpuset.c | 4 +- trunk/kernel/irq/chip.c | 2 +- trunk/kernel/irq/manage.c | 22 +- trunk/kernel/irq/migration.c | 14 +- trunk/kernel/irq/proc.c | 29 +- trunk/kernel/profile.c | 4 +- trunk/kernel/rcuclassic.c | 2 +- trunk/kernel/sched.c | 970 +++++------ trunk/kernel/sched_cpupri.c | 39 +- trunk/kernel/sched_cpupri.h | 5 +- trunk/kernel/sched_fair.c | 32 +- trunk/kernel/sched_rt.c | 73 +- trunk/kernel/sched_stats.h | 3 +- trunk/kernel/taskstats.c | 2 +- trunk/kernel/time/clockevents.c | 2 - trunk/kernel/time/tick-broadcast.c | 2 +- trunk/kernel/time/tick-common.c | 12 +- trunk/kernel/time/tick-sched.c | 10 +- trunk/kernel/trace/trace.c | 4 +- trunk/lib/Kconfig | 7 - trunk/mm/slub.c | 2 +- trunk/virt/kvm/ioapic.c | 8 +- trunk/virt/kvm/ioapic.h | 2 - trunk/virt/kvm/irq_comm.c | 19 +- trunk/virt/kvm/kvm_main.c | 420 ++--- trunk/virt/kvm/kvm_trace.c | 1 - 311 files changed, 4159 insertions(+), 7001 deletions(-) delete mode 100644 trunk/arch/ia64/kvm/kvm_lib.c delete mode 100644 trunk/arch/powerpc/include/asm/disassemble.h delete mode 100644 trunk/arch/powerpc/include/asm/kvm_44x.h delete mode 100644 trunk/arch/powerpc/kvm/44x.c delete mode 100644 trunk/arch/powerpc/kvm/44x_emulate.c delete mode 100644 trunk/arch/powerpc/kvm/booke.h rename trunk/arch/powerpc/kvm/{booke.c => booke_guest.c} (56%) create mode 100644 trunk/arch/powerpc/kvm/booke_host.c delete mode 100644 trunk/arch/powerpc/kvm/timing.c delete mode 100644 trunk/arch/powerpc/kvm/timing.h delete mode 100644 trunk/arch/x86/include/asm/sys_ia32.h delete mode 100644 trunk/arch/x86/include/asm/virtext.h rename trunk/arch/x86/{include/asm => kvm}/svm.h (100%) rename trunk/arch/x86/{include/asm => kvm}/vmx.h (93%) rename trunk/arch/x86/{include/asm/pci_x86.h => pci/pci.h} (88%) diff --git a/[refs] b/[refs] index a587c0e6a238..092e9bade019 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: a8e782348d9f0dc64f6adb81f5f6959921949f13 +refs/heads/master: cfb2a494bb7dca9cf8d1632fbed14b34db051980 diff --git a/trunk/Documentation/cpu-hotplug.txt b/trunk/Documentation/cpu-hotplug.txt index 9d620c153b04..94bbc27ddd4f 100644 --- a/trunk/Documentation/cpu-hotplug.txt +++ b/trunk/Documentation/cpu-hotplug.txt @@ -50,17 +50,16 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets cpu_possible_map = cpu_present_map + additional_cpus (*) Option valid only for following architectures -- ia64 +- x86_64, ia64 -ia64 uses the number of disabled local apics in ACPI tables MADT to -determine the number of potentially hot-pluggable cpus. The implementation -should only rely on this to count the # of cpus, but *MUST* not rely -on the apicid values in those tables for disabled apics. In the event -BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could -use this parameter "additional_cpus=x" to represent those cpus in the -cpu_possible_map. +ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT +to determine the number of potentially hot-pluggable cpus. The implementation +should only rely on this to count the # of cpus, but *MUST* not rely on the +apicid values in those tables for disabled apics. In the event BIOS doesn't +mark such hot-pluggable cpus as disabled entries, one could use this +parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map. -possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus. +possible_cpus=n [s390 only] use this to set hotpluggable cpus. This option sets possible_cpus bits in cpu_possible_map. Thus keeping the numbers of bits set constant even if the machine gets rebooted. diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index befacf07729f..88f43e09aeb3 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -2542,6 +2542,8 @@ W: http://kvm.qumranet.com S: Supported KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64) +P: Anthony Xu +M: anthony.xu@intel.com P: Xiantao Zhang M: xiantao.zhang@intel.com L: kvm-ia64@vger.kernel.org diff --git a/trunk/arch/alpha/include/asm/smp.h b/trunk/arch/alpha/include/asm/smp.h index 547e90951cec..544c69af8168 100644 --- a/trunk/arch/alpha/include/asm/smp.h +++ b/trunk/arch/alpha/include/asm/smp.h @@ -45,6 +45,7 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS]; #define raw_smp_processor_id() (current_thread_info()->cpu) extern int smp_num_cpus; +#define cpu_possible_map cpu_present_map extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi(cpumask_t mask); diff --git a/trunk/arch/alpha/kernel/irq.c b/trunk/arch/alpha/kernel/irq.c index d0f1620007f7..c626a821cdcb 100644 --- a/trunk/arch/alpha/kernel/irq.c +++ b/trunk/arch/alpha/kernel/irq.c @@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq) last_cpu = cpu; irq_desc[irq].affinity = cpumask_of_cpu(cpu); - irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); + irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu)); return 0; } #endif /* CONFIG_SMP */ diff --git a/trunk/arch/alpha/kernel/process.c b/trunk/arch/alpha/kernel/process.c index f238370c907d..351407e07e71 100644 --- a/trunk/arch/alpha/kernel/process.c +++ b/trunk/arch/alpha/kernel/process.c @@ -94,7 +94,6 @@ common_shutdown_1(void *generic_ptr) flags |= 0x00040000UL; /* "remain halted" */ *pflags = flags; cpu_clear(cpuid, cpu_present_map); - cpu_clear(cpuid, cpu_possible_map); halt(); } #endif @@ -121,7 +120,6 @@ common_shutdown_1(void *generic_ptr) #ifdef CONFIG_SMP /* Wait for the secondaries to halt. */ cpu_clear(boot_cpuid, cpu_present_map); - cpu_clear(boot_cpuid, cpu_possible_map); while (cpus_weight(cpu_present_map)) barrier(); #endif diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c index d953e510f68d..cf7da10097bb 100644 --- a/trunk/arch/alpha/kernel/smp.c +++ b/trunk/arch/alpha/kernel/smp.c @@ -70,6 +70,11 @@ enum ipi_message_type { /* Set to a secondary's cpuid when it comes online. */ static int smp_secondary_alive __devinitdata = 0; +/* Which cpus ids came online. */ +cpumask_t cpu_online_map; + +EXPORT_SYMBOL(cpu_online_map); + int smp_num_probed; /* Internal processor count */ int smp_num_cpus = 1; /* Number that came online. */ EXPORT_SYMBOL(smp_num_cpus); @@ -435,7 +440,6 @@ setup_smp(void) ((char *)cpubase + i*hwrpb->processor_size); if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; - cpu_set(i, cpu_possible_map); cpu_set(i, cpu_present_map); cpu->pal_revision = boot_cpu_palrev; } @@ -469,7 +473,6 @@ smp_prepare_cpus(unsigned int max_cpus) /* Nothing to do on a UP box, or when told not to. */ if (smp_num_probed == 1 || max_cpus == 0) { - cpu_possible_map = cpumask_of_cpu(boot_cpuid); cpu_present_map = cpumask_of_cpu(boot_cpuid); printk(KERN_INFO "SMP mode deactivated.\n"); return; diff --git a/trunk/arch/alpha/kernel/sys_dp264.c b/trunk/arch/alpha/kernel/sys_dp264.c index ab44c164d9d4..c71b0fd7a61f 100644 --- a/trunk/arch/alpha/kernel/sys_dp264.c +++ b/trunk/arch/alpha/kernel/sys_dp264.c @@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } static void -dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) +dp264_set_affinity(unsigned int irq, cpumask_t affinity) { spin_lock(&dp264_irq_lock); - cpu_set_irq_affinity(irq, *affinity); + cpu_set_irq_affinity(irq, affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } static void -clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) +clipper_set_affinity(unsigned int irq, cpumask_t affinity) { spin_lock(&dp264_irq_lock); - cpu_set_irq_affinity(irq - 16, *affinity); + cpu_set_irq_affinity(irq - 16, affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } diff --git a/trunk/arch/alpha/kernel/sys_titan.c b/trunk/arch/alpha/kernel/sys_titan.c index 27f840a4ad3d..52c91ccc1648 100644 --- a/trunk/arch/alpha/kernel/sys_titan.c +++ b/trunk/arch/alpha/kernel/sys_titan.c @@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } static void -titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) +titan_set_irq_affinity(unsigned int irq, cpumask_t affinity) { spin_lock(&titan_irq_lock); - titan_cpu_set_irq_affinity(irq - 16, *affinity); + titan_cpu_set_irq_affinity(irq - 16, affinity); titan_update_irq_hw(titan_cached_irq_mask); spin_unlock(&titan_irq_lock); } diff --git a/trunk/arch/arm/common/gic.c b/trunk/arch/arm/common/gic.c index c6884ba1d5ed..7fc9860a97d7 100644 --- a/trunk/arch/arm/common/gic.c +++ b/trunk/arch/arm/common/gic.c @@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) +static void gic_set_cpu(unsigned int irq, cpumask_t mask_val) { void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); unsigned int shift = (irq % 4) * 8; - unsigned int cpu = cpumask_first(mask_val); + unsigned int cpu = first_cpu(mask_val); u32 val; spin_lock(&irq_controller_lock); diff --git a/trunk/arch/arm/kernel/irq.c b/trunk/arch/arm/kernel/irq.c index 7141cee1fab7..2f3eb795fa6e 100644 --- a/trunk/arch/arm/kernel/irq.c +++ b/trunk/arch/arm/kernel/irq.c @@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu); spin_lock_irq(&desc->lock); - desc->chip->set_affinity(irq, cpumask_of(cpu)); + desc->chip->set_affinity(irq, cpumask_of_cpu(cpu)); spin_unlock_irq(&desc->lock); } diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c index 55fa7ff96a3e..019237d21622 100644 --- a/trunk/arch/arm/kernel/smp.c +++ b/trunk/arch/arm/kernel/smp.c @@ -33,6 +33,16 @@ #include #include +/* + * bitmask of present and online CPUs. + * The present bitmask indicates that the CPU is physically present. + * The online bitmask indicates that the CPU is up and running. + */ +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); +cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core diff --git a/trunk/arch/arm/mach-at91/at91rm9200_time.c b/trunk/arch/arm/mach-at91/at91rm9200_time.c index 1ff1bda0a894..d140eae53ded 100644 --- a/trunk/arch/arm/mach-at91/at91rm9200_time.c +++ b/trunk/arch/arm/mach-at91/at91rm9200_time.c @@ -178,6 +178,7 @@ static struct clock_event_device clkevt = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 150, + .cpumask = CPU_MASK_CPU0, .set_next_event = clkevt32k_next_event, .set_mode = clkevt32k_mode, }; @@ -205,7 +206,7 @@ void __init at91rm9200_timer_init(void) clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift); clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt); clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1; - clkevt.cpumask = cpumask_of(0); + clkevt.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clkevt); /* register clocksource */ diff --git a/trunk/arch/arm/mach-at91/at91sam926x_time.c b/trunk/arch/arm/mach-at91/at91sam926x_time.c index b63e1d5f1bad..122fd77ed580 100644 --- a/trunk/arch/arm/mach-at91/at91sam926x_time.c +++ b/trunk/arch/arm/mach-at91/at91sam926x_time.c @@ -91,6 +91,7 @@ static struct clock_event_device pit_clkevt = { .features = CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 100, + .cpumask = CPU_MASK_CPU0, .set_mode = pit_clkevt_mode, }; @@ -172,7 +173,6 @@ static void __init at91sam926x_pit_init(void) /* Set up and register clockevents */ pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift); - pit_clkevt.cpumask = cpumask_of(0); clockevents_register_device(&pit_clkevt); } diff --git a/trunk/arch/arm/mach-davinci/time.c b/trunk/arch/arm/mach-davinci/time.c index f8bcd29d17a6..3b9a296b5c4b 100644 --- a/trunk/arch/arm/mach-davinci/time.c +++ b/trunk/arch/arm/mach-davinci/time.c @@ -322,7 +322,7 @@ static void __init davinci_timer_init(void) clockevent_davinci.min_delta_ns = clockevent_delta2ns(1, &clockevent_davinci); - clockevent_davinci.cpumask = cpumask_of(0); + clockevent_davinci.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clockevent_davinci); } diff --git a/trunk/arch/arm/mach-imx/time.c b/trunk/arch/arm/mach-imx/time.c index aff0ebcfa847..a11765f5f23b 100644 --- a/trunk/arch/arm/mach-imx/time.c +++ b/trunk/arch/arm/mach-imx/time.c @@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate) clockevent_imx.min_delta_ns = clockevent_delta2ns(0xf, &clockevent_imx); - clockevent_imx.cpumask = cpumask_of(0); + clockevent_imx.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clockevent_imx); diff --git a/trunk/arch/arm/mach-ixp4xx/common.c b/trunk/arch/arm/mach-ixp4xx/common.c index f4656d2ac8a8..7766f469456b 100644 --- a/trunk/arch/arm/mach-ixp4xx/common.c +++ b/trunk/arch/arm/mach-ixp4xx/common.c @@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void) clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx); clockevent_ixp4xx.min_delta_ns = clockevent_delta2ns(0xf, &clockevent_ixp4xx); - clockevent_ixp4xx.cpumask = cpumask_of(0); + clockevent_ixp4xx.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clockevent_ixp4xx); return 0; diff --git a/trunk/arch/arm/mach-msm/timer.c b/trunk/arch/arm/mach-msm/timer.c index 444d9c0f5ca6..345a14cb73c3 100644 --- a/trunk/arch/arm/mach-msm/timer.c +++ b/trunk/arch/arm/mach-msm/timer.c @@ -182,7 +182,7 @@ static void __init msm_timer_init(void) clockevent_delta2ns(0xf0000000 >> clock->shift, ce); /* 4 gets rounded down to 3 */ ce->min_delta_ns = clockevent_delta2ns(4, ce); - ce->cpumask = cpumask_of(0); + ce->cpumask = cpumask_of_cpu(0); cs->mult = clocksource_hz2mult(clock->freq, cs->shift); res = clocksource_register(cs); diff --git a/trunk/arch/arm/mach-ns9xxx/time-ns9360.c b/trunk/arch/arm/mach-ns9xxx/time-ns9360.c index 41df69721769..a63424d083d9 100644 --- a/trunk/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/trunk/arch/arm/mach-ns9xxx/time-ns9360.c @@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void) ns9360_clockevent_device.min_delta_ns = clockevent_delta2ns(1, &ns9360_clockevent_device); - ns9360_clockevent_device.cpumask = cpumask_of(0); + ns9360_clockevent_device.cpumask = cpumask_of_cpu(0); clockevents_register_device(&ns9360_clockevent_device); setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT, diff --git a/trunk/arch/arm/mach-omap1/time.c b/trunk/arch/arm/mach-omap1/time.c index 495a32c287b4..2cf7e32bd293 100644 --- a/trunk/arch/arm/mach-omap1/time.c +++ b/trunk/arch/arm/mach-omap1/time.c @@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate) clockevent_mpu_timer1.min_delta_ns = clockevent_delta2ns(1, &clockevent_mpu_timer1); - clockevent_mpu_timer1.cpumask = cpumask_of(0); + clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clockevent_mpu_timer1); } diff --git a/trunk/arch/arm/mach-omap1/timer32k.c b/trunk/arch/arm/mach-omap1/timer32k.c index fd3f7396e162..705367ece174 100644 --- a/trunk/arch/arm/mach-omap1/timer32k.c +++ b/trunk/arch/arm/mach-omap1/timer32k.c @@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void) clockevent_32k_timer.min_delta_ns = clockevent_delta2ns(1, &clockevent_32k_timer); - clockevent_32k_timer.cpumask = cpumask_of(0); + clockevent_32k_timer.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clockevent_32k_timer); } diff --git a/trunk/arch/arm/mach-omap2/timer-gp.c b/trunk/arch/arm/mach-omap2/timer-gp.c index ae6036300f60..589393bedade 100644 --- a/trunk/arch/arm/mach-omap2/timer-gp.c +++ b/trunk/arch/arm/mach-omap2/timer-gp.c @@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void) clockevent_gpt.min_delta_ns = clockevent_delta2ns(1, &clockevent_gpt); - clockevent_gpt.cpumask = cpumask_of(0); + clockevent_gpt.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clockevent_gpt); } diff --git a/trunk/arch/arm/mach-pxa/time.c b/trunk/arch/arm/mach-pxa/time.c index 95656a72268d..001624158519 100644 --- a/trunk/arch/arm/mach-pxa/time.c +++ b/trunk/arch/arm/mach-pxa/time.c @@ -122,6 +122,7 @@ static struct clock_event_device ckevt_pxa_osmr0 = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, + .cpumask = CPU_MASK_CPU0, .set_next_event = pxa_osmr0_set_next_event, .set_mode = pxa_osmr0_set_mode, }; @@ -162,7 +163,6 @@ static void __init pxa_timer_init(void) clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0); ckevt_pxa_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; - ckevt_pxa_osmr0.cpumask = cpumask_of(0); cksrc_pxa_oscr0.mult = clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift); diff --git a/trunk/arch/arm/mach-realview/core.c b/trunk/arch/arm/mach-realview/core.c index bd2aa4f16141..5f1d55963ced 100644 --- a/trunk/arch/arm/mach-realview/core.c +++ b/trunk/arch/arm/mach-realview/core.c @@ -624,7 +624,7 @@ static struct clock_event_device timer0_clockevent = { .set_mode = timer_set_mode, .set_next_event = timer_set_next_event, .rating = 300, - .cpumask = cpu_all_mask, + .cpumask = CPU_MASK_ALL, }; static void __init realview_clockevents_init(unsigned int timer_irq) diff --git a/trunk/arch/arm/mach-realview/localtimer.c b/trunk/arch/arm/mach-realview/localtimer.c index 67d6d9cc68b2..9019ef2e5611 100644 --- a/trunk/arch/arm/mach-realview/localtimer.c +++ b/trunk/arch/arm/mach-realview/localtimer.c @@ -154,7 +154,7 @@ void __cpuinit local_timer_setup(void) clk->set_mode = local_timer_set_mode; clk->set_next_event = local_timer_set_next_event; clk->irq = IRQ_LOCALTIMER; - clk->cpumask = cpumask_of(cpu); + clk->cpumask = cpumask_of_cpu(cpu); clk->shift = 20; clk->mult = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift); clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); @@ -193,7 +193,7 @@ void __cpuinit local_timer_setup(void) clk->rating = 200; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; - clk->cpumask = cpumask_of(cpu); + clk->cpumask = cpumask_of_cpu(cpu); clockevents_register_device(clk); } diff --git a/trunk/arch/arm/mach-sa1100/time.c b/trunk/arch/arm/mach-sa1100/time.c index 711c0295c66f..8c5e727f3b75 100644 --- a/trunk/arch/arm/mach-sa1100/time.c +++ b/trunk/arch/arm/mach-sa1100/time.c @@ -73,6 +73,7 @@ static struct clock_event_device ckevt_sa1100_osmr0 = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, + .cpumask = CPU_MASK_CPU0, .set_next_event = sa1100_osmr0_set_next_event, .set_mode = sa1100_osmr0_set_mode, }; @@ -109,7 +110,6 @@ static void __init sa1100_timer_init(void) clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0); ckevt_sa1100_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; - ckevt_sa1100_osmr0.cpumask = cpumask_of(0); cksrc_sa1100_oscr.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift); diff --git a/trunk/arch/arm/mach-versatile/core.c b/trunk/arch/arm/mach-versatile/core.c index 1c43494f5c42..df25aa138509 100644 --- a/trunk/arch/arm/mach-versatile/core.c +++ b/trunk/arch/arm/mach-versatile/core.c @@ -1005,7 +1005,7 @@ static void __init versatile_timer_init(void) timer0_clockevent.min_delta_ns = clockevent_delta2ns(0xf, &timer0_clockevent); - timer0_clockevent.cpumask = cpumask_of(0); + timer0_clockevent.cpumask = cpumask_of_cpu(0); clockevents_register_device(&timer0_clockevent); } diff --git a/trunk/arch/arm/oprofile/op_model_mpcore.c b/trunk/arch/arm/oprofile/op_model_mpcore.c index 6d6bd5899240..4de366e8b4c5 100644 --- a/trunk/arch/arm/oprofile/op_model_mpcore.c +++ b/trunk/arch/arm/oprofile/op_model_mpcore.c @@ -260,10 +260,10 @@ static void em_stop(void) static void em_route_irq(int irq, unsigned int cpu) { struct irq_desc *desc = irq_desc + irq; - const struct cpumask *mask = cpumask_of(cpu); + cpumask_t mask = cpumask_of_cpu(cpu); spin_lock_irq(&desc->lock); - desc->affinity = *mask; + desc->affinity = mask; desc->chip->set_affinity(irq, mask); spin_unlock_irq(&desc->lock); } diff --git a/trunk/arch/arm/plat-mxc/time.c b/trunk/arch/arm/plat-mxc/time.c index 758a1293bcfa..fd28f5194f71 100644 --- a/trunk/arch/arm/plat-mxc/time.c +++ b/trunk/arch/arm/plat-mxc/time.c @@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void) clockevent_mxc.min_delta_ns = clockevent_delta2ns(0xff, &clockevent_mxc); - clockevent_mxc.cpumask = cpumask_of(0); + clockevent_mxc.cpumask = cpumask_of_cpu(0); clockevents_register_device(&clockevent_mxc); diff --git a/trunk/arch/arm/plat-orion/time.c b/trunk/arch/arm/plat-orion/time.c index 6fa2923e6dca..544d6b327f3a 100644 --- a/trunk/arch/arm/plat-orion/time.c +++ b/trunk/arch/arm/plat-orion/time.c @@ -149,6 +149,7 @@ static struct clock_event_device orion_clkevt = { .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 300, + .cpumask = CPU_MASK_CPU0, .set_next_event = orion_clkevt_next_event, .set_mode = orion_clkevt_mode, }; @@ -198,6 +199,5 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk) orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift); orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt); orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt); - orion_clkevt.cpumask = cpumask_of(0); clockevents_register_device(&orion_clkevt); } diff --git a/trunk/arch/avr32/kernel/time.c b/trunk/arch/avr32/kernel/time.c index 0ff46bf873b0..283481d74a5b 100644 --- a/trunk/arch/avr32/kernel/time.c +++ b/trunk/arch/avr32/kernel/time.c @@ -106,6 +106,7 @@ static struct clock_event_device comparator = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 16, .rating = 50, + .cpumask = CPU_MASK_CPU0, .set_next_event = comparator_next_event, .set_mode = comparator_mode, }; @@ -133,7 +134,6 @@ void __init time_init(void) comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift); comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator); comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1; - comparator.cpumask = cpumask_of(0); sysreg_write(COMPARE, 0); timer_irqaction.dev_id = &comparator; diff --git a/trunk/arch/blackfin/kernel/time-ts.c b/trunk/arch/blackfin/kernel/time-ts.c index 0ed2badfd746..e887efc86c29 100644 --- a/trunk/arch/blackfin/kernel/time-ts.c +++ b/trunk/arch/blackfin/kernel/time-ts.c @@ -162,6 +162,7 @@ static struct clock_event_device clockevent_bfin = { .name = "bfin_core_timer", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .shift = 32, + .cpumask = CPU_MASK_CPU0, .set_next_event = bfin_timer_set_next_event, .set_mode = bfin_timer_set_mode, }; @@ -192,7 +193,6 @@ static int __init bfin_clockevent_init(void) clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift); clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin); clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin); - clockevent_bfin.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_bfin); return 0; diff --git a/trunk/arch/cris/arch-v32/kernel/irq.c b/trunk/arch/cris/arch-v32/kernel/irq.c index 295131fee710..173c141ac9ba 100644 --- a/trunk/arch/cris/arch-v32/kernel/irq.c +++ b/trunk/arch/cris/arch-v32/kernel/irq.c @@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq) { } -void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) +void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest) { unsigned long flags; spin_lock_irqsave(&irq_lock, flags); - irq_allocations[irq - FIRST_IRQ].mask = *dest; + irq_allocations[irq - FIRST_IRQ].mask = dest; spin_unlock_irqrestore(&irq_lock, flags); } diff --git a/trunk/arch/cris/arch-v32/kernel/smp.c b/trunk/arch/cris/arch-v32/kernel/smp.c index 9dac17334640..52e16c6436f9 100644 --- a/trunk/arch/cris/arch-v32/kernel/smp.c +++ b/trunk/arch/cris/arch-v32/kernel/smp.c @@ -29,7 +29,11 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ +cpumask_t cpu_online_map = CPU_MASK_NONE; +EXPORT_SYMBOL(cpu_online_map); cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ diff --git a/trunk/arch/cris/include/asm/smp.h b/trunk/arch/cris/include/asm/smp.h index c615a06dd757..dba33aba3e95 100644 --- a/trunk/arch/cris/include/asm/smp.h +++ b/trunk/arch/cris/include/asm/smp.h @@ -4,6 +4,7 @@ #include extern cpumask_t phys_cpu_present_map; +extern cpumask_t cpu_possible_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/trunk/arch/ia64/hp/sim/hpsim_irq.c b/trunk/arch/ia64/hp/sim/hpsim_irq.c index cc0a3182db3c..c2f58ff364e7 100644 --- a/trunk/arch/ia64/hp/sim/hpsim_irq.c +++ b/trunk/arch/ia64/hp/sim/hpsim_irq.c @@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq) } static void -hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b) +hpsim_set_affinity_noop (unsigned int a, cpumask_t b) { } diff --git a/trunk/arch/ia64/include/asm/kvm.h b/trunk/arch/ia64/include/asm/kvm.h index 68aa6da807c1..f38472ac2267 100644 --- a/trunk/arch/ia64/include/asm/kvm.h +++ b/trunk/arch/ia64/include/asm/kvm.h @@ -166,6 +166,8 @@ struct saved_vpd { }; struct kvm_regs { + char *saved_guest; + char *saved_stack; struct saved_vpd vpd; /*Arch-regs*/ int mp_state; @@ -198,10 +200,6 @@ struct kvm_regs { unsigned long fp_psr; /*used for lazy float register */ unsigned long saved_gp; /*for phycial emulation */ - - union context saved_guest; - - unsigned long reserved[64]; /* for future use */ }; struct kvm_sregs { diff --git a/trunk/arch/ia64/include/asm/kvm_host.h b/trunk/arch/ia64/include/asm/kvm_host.h index 0560f3fae538..c60d324da540 100644 --- a/trunk/arch/ia64/include/asm/kvm_host.h +++ b/trunk/arch/ia64/include/asm/kvm_host.h @@ -23,6 +23,17 @@ #ifndef __ASM_KVM_HOST_H #define __ASM_KVM_HOST_H + +#include +#include +#include +#include +#include + +#include +#include + +#define KVM_MAX_VCPUS 4 #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -39,132 +50,70 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 6 #define EXIT_REASON_IPI 7 #define EXIT_REASON_PTC_G 8 -#define EXIT_REASON_DEBUG 20 /*Define vmm address space and vm data space.*/ -#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20) +#define KVM_VMM_SIZE (16UL<<20) #define KVM_VMM_SHIFT 24 -#define KVM_VMM_BASE 0xD000000000000000 -#define VMM_SIZE (__IA64_UL_CONST(8)<<20) +#define KVM_VMM_BASE 0xD000000000000000UL +#define VMM_SIZE (8UL<<20) /* * Define vm_buffer, used by PAL Services, base address. - * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M + * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M */ #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) -#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20) - -/* - * kvm guest's data area looks as follow: - * - * +----------------------+ ------- KVM_VM_DATA_SIZE - * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET - * | | | / | - * | .......... | | /vcpu's struct&stack | - * | .......... | | /---------------------|---- 0 - * | vcpu[5]'s data | | / vpd | - * | vcpu[4]'s data | |/-----------------------| - * | vcpu[3]'s data | / vtlb | - * | vcpu[2]'s data | /|------------------------| - * | vcpu[1]'s data |/ | vhpt | - * | vcpu[0]'s data |____________________________| - * +----------------------+ | - * | memory dirty log | | - * +----------------------+ | - * | vm's data struct | | - * +----------------------+ | - * | | | - * | | | - * | | | - * | | | - * | | | - * | | | - * | | | - * | vm's p2m table | | - * | | | - * | | | - * | | | | - * vm's data->| | | | - * +----------------------+ ------- 0 - * To support large memory, needs to increase the size of p2m. - * To support more vcpus, needs to ensure it has enough space to - * hold vcpus' data. - */ - -#define KVM_VM_DATA_SHIFT 26 -#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT) -#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE) - -#define KVM_P2M_BASE KVM_VM_DATA_BASE -#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20) - -#define VHPT_SHIFT 16 -#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT) -#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5)) - -#define VTLB_SHIFT 16 -#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT) -#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5)) - -#define VPD_SHIFT 16 -#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT) - -#define VCPU_STRUCT_SHIFT 16 -#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) - -#define KVM_STK_OFFSET VCPU_STRUCT_SIZE - -#define KVM_VM_STRUCT_SHIFT 19 -#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) - -#define KVM_MEM_DIRY_LOG_SHIFT 19 -#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT) - -#ifndef __ASSEMBLY__ - -/*Define the max vcpus and memory for Guests.*/ -#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\ - KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data) -#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT) - -#define VMM_LOG_LEN 256 - -#include -#include -#include -#include -#include - -#include -#include -#include - -struct kvm_vcpu_data { - char vcpu_vhpt[VHPT_SIZE]; - char vcpu_vtlb[VTLB_SIZE]; - char vcpu_vpd[VPD_SIZE]; - char vcpu_struct[VCPU_STRUCT_SIZE]; -}; - -struct kvm_vm_data { - char kvm_p2m[KVM_P2M_SIZE]; - char kvm_vm_struct[KVM_VM_STRUCT_SIZE]; - char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE]; - struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; -}; - -#define VCPU_BASE(n) KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, vcpu_data[n]) -#define VM_BASE KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, kvm_vm_struct) -#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, kvm_mem_dirty_log) - -#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt)) -#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb)) -#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd)) -#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \ - offsetof(struct kvm_vcpu_data, vcpu_struct)) +#define KVM_VM_BUFFER_SIZE (8UL<<20) + +/*Define Virtual machine data layout.*/ +#define KVM_VM_DATA_SHIFT 24 +#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) +#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) + + +#define KVM_P2M_BASE KVM_VM_DATA_BASE +#define KVM_P2M_OFS 0 +#define KVM_P2M_SIZE (8UL << 20) + +#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) +#define KVM_VHPT_OFS KVM_P2M_SIZE +#define KVM_VHPT_BLOCK_SIZE (2UL << 20) +#define VHPT_SHIFT 18 +#define VHPT_SIZE (1UL << VHPT_SHIFT) +#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) + +#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) +#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) +#define KVM_VTLB_BLOCK_SIZE (1UL<<20) +#define VTLB_SHIFT 17 +#define VTLB_SIZE (1UL<= nr_cpu_ids) + cpus_and(mask, mask, cpu_online_map); + if (cpus_empty(mask)) return; - if (irq_prepare_move(irq, cpu)) + if (irq_prepare_move(irq, first_cpu(mask))) return; - dest = cpu_physical_id(cpu); + dest = cpu_physical_id(first_cpu(mask)); if (!iosapic_intr_info[irq].count) return; /* not an IOSAPIC interrupt */ diff --git a/trunk/arch/ia64/kernel/irq.c b/trunk/arch/ia64/kernel/irq.c index 0b6db53fedcf..7fd18f54c056 100644 --- a/trunk/arch/ia64/kernel/irq.c +++ b/trunk/arch/ia64/kernel/irq.c @@ -133,6 +133,7 @@ unsigned int vectors_in_migration[NR_IRQS]; */ static void migrate_irqs(void) { + cpumask_t mask; irq_desc_t *desc; int irq, new_cpu; @@ -151,14 +152,15 @@ static void migrate_irqs(void) if (desc->status == IRQ_PER_CPU) continue; - if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) - >= nr_cpu_ids) { + cpus_and(mask, irq_desc[irq].affinity, cpu_online_map); + if (any_online_cpu(mask) == NR_CPUS) { /* * Save it for phase 2 processing */ vectors_in_migration[irq] = irq; new_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpu); /* * Al three are essential, currently WARN_ON.. maybe panic? @@ -166,8 +168,7 @@ static void migrate_irqs(void) if (desc->chip && desc->chip->disable && desc->chip->enable && desc->chip->set_affinity) { desc->chip->disable(irq); - desc->chip->set_affinity(irq, - cpumask_of(new_cpu)); + desc->chip->set_affinity(irq, mask); desc->chip->enable(irq); } else { WARN_ON((!(desc->chip) || !(desc->chip->disable) || diff --git a/trunk/arch/ia64/kernel/msi_ia64.c b/trunk/arch/ia64/kernel/msi_ia64.c index 890339339035..702a09c13238 100644 --- a/trunk/arch/ia64/kernel/msi_ia64.c +++ b/trunk/arch/ia64/kernel/msi_ia64.c @@ -49,12 +49,11 @@ static struct irq_chip ia64_msi_chip; #ifdef CONFIG_SMP -static void ia64_set_msi_irq_affinity(unsigned int irq, - const cpumask_t *cpu_mask) +static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) { struct msi_msg msg; u32 addr, data; - int cpu = first_cpu(*cpu_mask); + int cpu = first_cpu(cpu_mask); if (!cpu_online(cpu)) return; @@ -167,11 +166,12 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; - int cpu = cpumask_first(mask); + int cpu = first_cpu(mask); + if (!cpu_online(cpu)) return; @@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = *mask; + irq_desc[irq].affinity = mask; } #endif /* CONFIG_SMP */ diff --git a/trunk/arch/ia64/kernel/smpboot.c b/trunk/arch/ia64/kernel/smpboot.c index 11463994a7d5..1dcbb85fc4ee 100644 --- a/trunk/arch/ia64/kernel/smpboot.c +++ b/trunk/arch/ia64/kernel/smpboot.c @@ -131,6 +131,12 @@ struct task_struct *task_for_booting_cpu; */ DEFINE_PER_CPU(int, cpu_state); +/* Bitmasks of currently online, and possible CPUs */ +cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); +cpumask_t cpu_possible_map = CPU_MASK_NONE; +EXPORT_SYMBOL(cpu_possible_map); + cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); @@ -682,7 +688,7 @@ int migrate_platform_irqs(unsigned int cpu) { int new_cpei_cpu; irq_desc_t *desc = NULL; - const struct cpumask *mask; + cpumask_t mask; int retval = 0; /* @@ -695,7 +701,7 @@ int migrate_platform_irqs(unsigned int cpu) * Now re-target the CPEI to a different processor */ new_cpei_cpu = any_online_cpu(cpu_online_map); - mask = cpumask_of(new_cpei_cpu); + mask = cpumask_of_cpu(new_cpei_cpu); set_cpei_target_cpu(new_cpei_cpu); desc = irq_desc + ia64_cpe_irq; /* diff --git a/trunk/arch/ia64/kernel/topology.c b/trunk/arch/ia64/kernel/topology.c index a8d61a3e9a94..c75b914f2d6b 100644 --- a/trunk/arch/ia64/kernel/topology.c +++ b/trunk/arch/ia64/kernel/topology.c @@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) cpumask_t shared_cpu_map; cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); - len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map); + len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map); len += sprintf(buf+len, "\n"); return len; } diff --git a/trunk/arch/ia64/kvm/Makefile b/trunk/arch/ia64/kvm/Makefile index 76464dc312e6..92cef66ca268 100644 --- a/trunk/arch/ia64/kvm/Makefile +++ b/trunk/arch/ia64/kvm/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ - vtlb.o process.o kvm_lib.o + vtlb.o process.o #Add link memcpy and memset to avoid possible structure assignment error kvm-intel-objs += memcpy.o memset.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/trunk/arch/ia64/kvm/asm-offsets.c b/trunk/arch/ia64/kvm/asm-offsets.c index 0c3564a7a033..4e3dc13a619c 100644 --- a/trunk/arch/ia64/kvm/asm-offsets.c +++ b/trunk/arch/ia64/kvm/asm-offsets.c @@ -24,10 +24,19 @@ #include #include -#include #include "vcpu.h" +#define task_struct kvm_vcpu + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : :) + +#define OFFSET(_sym, _str, _mem) \ + DEFINE(_sym, offsetof(_str, _mem)); + void foo(void) { DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); diff --git a/trunk/arch/ia64/kvm/kvm-ia64.c b/trunk/arch/ia64/kvm/kvm-ia64.c index 0f5ebd948437..af1464f7a6ad 100644 --- a/trunk/arch/ia64/kvm/kvm-ia64.c +++ b/trunk/arch/ia64/kvm/kvm-ia64.c @@ -180,6 +180,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_IRQCHIP: + case KVM_CAP_USER_MEMORY: case KVM_CAP_MP_STATE: r = 1; @@ -438,6 +439,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) expires = div64_u64(itc_diff, cyc_per_usec); kt = ktime_set(0, 1000 * expires); + down_read(&vcpu->kvm->slots_lock); vcpu->arch.ht_active = 1; hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); @@ -450,6 +452,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + up_read(&vcpu->kvm->slots_lock); if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) return -EINTR; @@ -473,13 +476,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, return 1; } -static int handle_vcpu_debug(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - printk("VMM: %s", vcpu->arch.log_buf); - return 1; -} - static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = { [EXIT_REASON_VM_PANIC] = handle_vm_error, @@ -491,7 +487,6 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_IPI] = handle_ipi, [EXIT_REASON_PTC_G] = handle_global_purge, - [EXIT_REASON_DEBUG] = handle_vcpu_debug, }; @@ -703,24 +698,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return r; } +/* + * Allocate 16M memory for every vm to hold its specific data. + * Its memory map is defined in kvm_host.h. + */ static struct kvm *kvm_alloc_kvm(void) { struct kvm *kvm; uint64_t vm_base; - BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); - vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); if (!vm_base) return ERR_PTR(-ENOMEM); + printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); + /* Zero all pages before use! */ memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); - kvm = (struct kvm *)(vm_base + - offsetof(struct kvm_vm_data, kvm_vm_struct)); + + kvm = (struct kvm *)(vm_base + KVM_VM_OFS); kvm->arch.vm_base = vm_base; - printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); return kvm; } @@ -762,12 +760,21 @@ static void kvm_build_io_pmt(struct kvm *kvm) static void kvm_init_vm(struct kvm *kvm) { + long vm_base; + BUG_ON(!kvm); kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; kvm->arch.vmm_init_rr = VMM_INIT_RR; + vm_base = kvm->arch.vm_base; + if (vm_base) { + kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; + kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; + kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; + } + /* *Fill P2M entries for MMIO/IO ranges */ @@ -831,8 +838,9 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); int i; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int r; vcpu_load(vcpu); @@ -849,7 +857,18 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vpd->vpr = regs->vpd.vpr; - memcpy(&vcpu->arch.guest, ®s->saved_guest, sizeof(union context)); + r = -EFAULT; + r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, + sizeof(union context)); + if (r) + goto out; + r = copy_from_user(vcpu + 1, regs->saved_stack + + sizeof(struct kvm_vcpu), + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); + if (r) + goto out; + vcpu->arch.exit_data = + ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; RESTORE_REGS(mp_state); RESTORE_REGS(vmm_rr); @@ -883,8 +902,9 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) set_bit(KVM_REQ_RESUME, &vcpu->requests); vcpu_put(vcpu); - - return 0; + r = 0; +out: + return r; } long kvm_arch_vm_ioctl(struct file *filp, @@ -1146,11 +1166,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /*Set entry address for first run.*/ regs->cr_iip = PALE_RESET_ENTRY; - /*Initialize itc offset for vcpus*/ + /*Initilize itc offset for vcpus*/ itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); - for (i = 0; i < KVM_MAX_VCPUS; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + - sizeof(struct kvm_vcpu_data) * i); + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); v->arch.itc_offset = itc_offset; v->arch.last_itc = 0; } @@ -1164,7 +1183,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.apic->vcpu = vcpu; p_ctx->gr[1] = 0; - p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); p_ctx->gr[13] = (unsigned long)vmm_vcpu; p_ctx->psr = 0x1008522000UL; p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ @@ -1199,12 +1218,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.hlt_timer.function = hlt_timer_fn; vcpu->arch.last_run_cpu = -1; - vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); + vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); vcpu->arch.vsa_base = kvm_vsa_base; vcpu->arch.__gp = kvm_vmm_gp; vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); - vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); - vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); init_ptce_info(vcpu); r = 0; @@ -1254,22 +1273,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int r; int cpu; - BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); - - r = -EINVAL; - if (id >= KVM_MAX_VCPUS) { - printk(KERN_ERR"kvm: Can't configure vcpus > %ld", - KVM_MAX_VCPUS); - goto fail; - } - r = -ENOMEM; if (!vm_base) { printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); goto fail; } - vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, - vcpu_data[id].vcpu_struct)); + vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); vcpu->kvm = kvm; cpu = get_cpu(); @@ -1365,9 +1374,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); int i; - + int r; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); vcpu_load(vcpu); for (i = 0; i < 16; i++) { @@ -1382,8 +1391,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->vpd.vpsr = vpd->vpsr; regs->vpd.vpr = vpd->vpr; - memcpy(®s->saved_guest, &vcpu->arch.guest, sizeof(union context)); - + r = -EFAULT; + r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, + sizeof(union context)); + if (r) + goto out; + r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); + if (r) + goto out; SAVE_REGS(mp_state); SAVE_REGS(vmm_rr); memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); @@ -1411,9 +1426,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) SAVE_REGS(metaphysical_saved_rr4); SAVE_REGS(fp_psr); SAVE_REGS(saved_gp); - vcpu_put(vcpu); - return 0; + r = 0; +out: + return r; } void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) @@ -1441,9 +1457,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; unsigned long base_gfn = memslot->base_gfn; - if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) - return -ENOMEM; - for (i = 0; i < npages; i++) { pfn = gfn_to_pfn(kvm, base_gfn + i); if (!kvm_is_mmio_pfn(pfn)) { @@ -1618,8 +1631,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int r, i; long n, base; - unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + - offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); + unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS + + KVM_MEM_DIRTY_LOG_OFS); r = -EINVAL; if (log->slot >= KVM_MEMORY_SLOTS) diff --git a/trunk/arch/ia64/kvm/kvm_lib.c b/trunk/arch/ia64/kvm/kvm_lib.c deleted file mode 100644 index a85cb611ecd7..000000000000 --- a/trunk/arch/ia64/kvm/kvm_lib.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * kvm_lib.c: Compile some libraries for kvm-intel module. - * - * Just include kernel's library, and disable symbols export. - * Copyright (C) 2008, Intel Corporation. - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#undef CONFIG_MODULES -#include "../../../lib/vsprintf.c" -#include "../../../lib/ctype.c" diff --git a/trunk/arch/ia64/kvm/kvm_minstate.h b/trunk/arch/ia64/kvm/kvm_minstate.h index b2bcaa2787aa..2cc41d17cf99 100644 --- a/trunk/arch/ia64/kvm/kvm_minstate.h +++ b/trunk/arch/ia64/kvm/kvm_minstate.h @@ -24,8 +24,6 @@ #include #include #include -#include - #include "asm-offsets.h" #define KVM_MINSTATE_START_SAVE_MIN \ @@ -35,7 +33,7 @@ addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ ;; \ lfetch.fault.excl.nt1 [r22]; \ - addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \ + addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ mov r23 = ar.bspstore; /* save ar.bspstore */ \ ;; \ mov ar.bspstore = r22; /* switch to kernel RBS */\ diff --git a/trunk/arch/ia64/kvm/misc.h b/trunk/arch/ia64/kvm/misc.h index dd979e00b574..e585c4607344 100644 --- a/trunk/arch/ia64/kvm/misc.h +++ b/trunk/arch/ia64/kvm/misc.h @@ -27,8 +27,7 @@ */ static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) { - return (uint64_t *)(kvm->arch.vm_base + - offsetof(struct kvm_vm_data, kvm_p2m)); + return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); } static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, diff --git a/trunk/arch/ia64/kvm/mmio.c b/trunk/arch/ia64/kvm/mmio.c index 21f63fffc379..7f1a858bc69f 100644 --- a/trunk/arch/ia64/kvm/mmio.c +++ b/trunk/arch/ia64/kvm/mmio.c @@ -66,25 +66,31 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr, switch (addr) { case PIB_OFST_INTA: - panic_vm(v, "Undefined write on PIB INTA\n"); + /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ + panic_vm(v); break; case PIB_OFST_XTP: if (length == 1) { vlsapic_write_xtp(v, val); } else { - panic_vm(v, "Undefined write on PIB XTP\n"); + /*panic_domain(NULL, + "Undefined write on PIB XTP\n");*/ + panic_vm(v); } break; default: if (PIB_LOW_HALF(addr)) { - /*Lower half */ + /*lower half */ if (length != 8) - panic_vm(v, "Can't LHF write with size %ld!\n", - length); + /*panic_domain(NULL, + "Can't LHF write with size %ld!\n", + length);*/ + panic_vm(v); else vlsapic_write_ipi(v, addr, val); - } else { /*Upper half */ - panic_vm(v, "IPI-UHF write %lx\n", addr); + } else { /* upper half + printk("IPI-UHF write %lx\n",addr);*/ + panic_vm(v); } break; } @@ -102,18 +108,22 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, if (length == 1) /* 1 byte load */ ; /* There is no i8259, there is no INTA access*/ else - panic_vm(v, "Undefined read on PIB INTA\n"); + /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ + panic_vm(v); break; case PIB_OFST_XTP: if (length == 1) { result = VLSAPIC_XTP(v); + /* printk("read xtp %lx\n", result); */ } else { - panic_vm(v, "Undefined read on PIB XTP\n"); + /*panic_domain(NULL, + "Undefined read on PIB XTP\n");*/ + panic_vm(v); } break; default: - panic_vm(v, "Undefined addr access for lsapic!\n"); + panic_vm(v); break; } return result; @@ -152,7 +162,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, /* it's necessary to ensure zero extending */ *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); } else - panic_vm(vcpu, "Unhandled mmio access returned!\n"); + panic_vm(vcpu); out: local_irq_restore(psr); return ; @@ -314,9 +324,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) return; } else { inst_type = -1; - panic_vm(vcpu, "Unsupported MMIO access instruction! \ - Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", - bundle.i64[0], bundle.i64[1]); + panic_vm(vcpu); } size = 1 << size; @@ -327,7 +335,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) if (inst_type == SL_INTEGER) vcpu_set_gr(vcpu, inst.M1.r1, data, 0); else - panic_vm(vcpu, "Unsupported instruction type!\n"); + panic_vm(vcpu); } vcpu_increment_iip(vcpu); diff --git a/trunk/arch/ia64/kvm/process.c b/trunk/arch/ia64/kvm/process.c index 552d07724207..800817307b7b 100644 --- a/trunk/arch/ia64/kvm/process.c +++ b/trunk/arch/ia64/kvm/process.c @@ -527,8 +527,7 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim, vector = vec2off[vec]; if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { - panic_vm(vcpu, "Interruption with vector :0x%lx occurs " - "with psr.ic = 0\n", vector); + panic_vm(vcpu); return; } @@ -587,7 +586,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu) vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); } else - panic_vm(vcpu, "Mis-set for exit reason!\n"); + panic_vm(vcpu); } static void set_sal_call_data(struct kvm_vcpu *vcpu) @@ -615,7 +614,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu) vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); } else - panic_vm(vcpu, "Mis-set for exit reason!\n"); + panic_vm(vcpu); } void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, @@ -681,7 +680,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu) vpsr = VCPU(vcpu, vpsr); isr = vpsr & IA64_PSR_RI; if (!(vpsr & IA64_PSR_IC)) - panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n"); + panic_vm(vcpu); reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ } @@ -942,20 +941,8 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu) ia64_set_pta(vcpu->arch.vhpt.pta.val); } -static void vmm_sanity_check(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) { - panic_vm(vcpu, "Failed to do vmm sanity check," - "it maybe caused by crashed vmm!!\n\n"); - } -} - static void kvm_do_resume_op(struct kvm_vcpu *vcpu) { - vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/ - if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { vcpu_do_resume(vcpu); return; @@ -981,11 +968,3 @@ void vmm_transition(struct kvm_vcpu *vcpu) 1, 0, 0, 0, 0, 0); kvm_do_resume_op(vcpu); } - -void vmm_panic_handler(u64 vec) -{ - struct kvm_vcpu *vcpu = current_vcpu; - vmm_sanity = 0; - panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n", - vec2off[vec]); -} diff --git a/trunk/arch/ia64/kvm/vcpu.c b/trunk/arch/ia64/kvm/vcpu.c index ecd526b55323..e44027ce5667 100644 --- a/trunk/arch/ia64/kvm/vcpu.c +++ b/trunk/arch/ia64/kvm/vcpu.c @@ -816,9 +816,8 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) unsigned long vitv = VCPU(vcpu, itv); if (vcpu->vcpu_id == 0) { - for (i = 0; i < KVM_MAX_VCPUS; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + - sizeof(struct kvm_vcpu_data) * i); + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); VMX(v, itc_offset) = itc_offset; VMX(v, last_itc) = 0; } @@ -1651,8 +1650,7 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) * Otherwise panic */ if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) - panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ - & vpsr.is=0\n"); + panic_vm(vcpu); /* * For those IA64_PSR bits: id/da/dd/ss/ed/ia @@ -2105,7 +2103,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu) if (is_physical_mode(vcpu)) { if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) - panic_vm(vcpu, "Machine Status conflicts!\n"); + panic_vm(vcpu); ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); ia64_dv_serialize_data(); @@ -2154,70 +2152,10 @@ int vmm_entry(void) return 0; } -static void kvm_show_registers(struct kvm_pt_regs *regs) -{ - unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; - - struct kvm_vcpu *vcpu = current_vcpu; - if (vcpu != NULL) - printk("vcpu 0x%p vcpu %d\n", - vcpu, vcpu->vcpu_id); - - printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n", - regs->cr_ipsr, regs->cr_ifs, ip); - - printk("unat: %016lx pfs : %016lx rsc : %016lx\n", - regs->ar_unat, regs->ar_pfs, regs->ar_rsc); - printk("rnat: %016lx bspstore: %016lx pr : %016lx\n", - regs->ar_rnat, regs->ar_bspstore, regs->pr); - printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", - regs->loadrs, regs->ar_ccv, regs->ar_fpsr); - printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); - printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, - regs->b6, regs->b7); - printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", - regs->f6.u.bits[1], regs->f6.u.bits[0], - regs->f7.u.bits[1], regs->f7.u.bits[0]); - printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", - regs->f8.u.bits[1], regs->f8.u.bits[0], - regs->f9.u.bits[1], regs->f9.u.bits[0]); - printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", - regs->f10.u.bits[1], regs->f10.u.bits[0], - regs->f11.u.bits[1], regs->f11.u.bits[0]); - - printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, - regs->r2, regs->r3); - printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, - regs->r9, regs->r10); - printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, - regs->r12, regs->r13); - printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, - regs->r15, regs->r16); - printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, - regs->r18, regs->r19); - printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, - regs->r21, regs->r22); - printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, - regs->r24, regs->r25); - printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, - regs->r27, regs->r28); - printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, - regs->r30, regs->r31); - -} - -void panic_vm(struct kvm_vcpu *v, const char *fmt, ...) -{ - va_list args; - char buf[256]; - - struct kvm_pt_regs *regs = vcpu_regs(v); +void panic_vm(struct kvm_vcpu *v) +{ struct exit_ctl_data *p = &v->arch.exit_data; - va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - printk(buf); - kvm_show_registers(regs); + p->exit_reason = EXIT_REASON_VM_PANIC; vmm_transition(v); /*Never to return*/ diff --git a/trunk/arch/ia64/kvm/vcpu.h b/trunk/arch/ia64/kvm/vcpu.h index b2f12a562bdf..e9b2a4e121c0 100644 --- a/trunk/arch/ia64/kvm/vcpu.h +++ b/trunk/arch/ia64/kvm/vcpu.h @@ -737,12 +737,9 @@ void kvm_init_vtlb(struct kvm_vcpu *v); void kvm_init_vhpt(struct kvm_vcpu *v); void thash_init(struct thash_cb *hcb, u64 sz); -void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); +void panic_vm(struct kvm_vcpu *v); extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); - -extern long vmm_sanity; - #endif #endif /* __VCPU_H__ */ diff --git a/trunk/arch/ia64/kvm/vmm.c b/trunk/arch/ia64/kvm/vmm.c index 9eee5c04bacc..2275bf4e681a 100644 --- a/trunk/arch/ia64/kvm/vmm.c +++ b/trunk/arch/ia64/kvm/vmm.c @@ -20,7 +20,6 @@ */ -#include #include #include @@ -32,8 +31,6 @@ MODULE_LICENSE("GPL"); extern char kvm_ia64_ivt; extern fpswa_interface_t *vmm_fpswa_interface; -long vmm_sanity = 1; - struct kvm_vmm_info vmm_info = { .module = THIS_MODULE, .vmm_entry = vmm_entry, @@ -65,31 +62,5 @@ void vmm_spin_unlock(spinlock_t *lock) { _vmm_raw_spin_unlock(lock); } - -static void vcpu_debug_exit(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - long psr; - - local_irq_save(psr); - p->exit_reason = EXIT_REASON_DEBUG; - vmm_transition(vcpu); - local_irq_restore(psr); -} - -asmlinkage int printk(const char *fmt, ...) -{ - struct kvm_vcpu *vcpu = current_vcpu; - va_list args; - int r; - - memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN); - va_start(args, fmt); - r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args); - va_end(args); - vcpu_debug_exit(vcpu); - return r; -} - module_init(kvm_vmm_init) module_exit(kvm_vmm_exit) diff --git a/trunk/arch/ia64/kvm/vmm_ivt.S b/trunk/arch/ia64/kvm/vmm_ivt.S index 3ef1a017a318..c1d7251a1480 100644 --- a/trunk/arch/ia64/kvm/vmm_ivt.S +++ b/trunk/arch/ia64/kvm/vmm_ivt.S @@ -1,5 +1,5 @@ /* - * arch/ia64/kvm/vmm_ivt.S + * /ia64/kvm_ivt.S * * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co * Stephane Eranian @@ -70,39 +70,32 @@ # define PSR_DEFAULT_BITS 0 #endif + #define KVM_FAULT(n) \ - kvm_fault_##n:; \ - mov r19=n;; \ - br.sptk.many kvm_vmm_panic; \ - ;; \ + kvm_fault_##n:; \ + mov r19=n;; \ + br.sptk.many kvm_fault_##n; \ + ;; \ + #define KVM_REFLECT(n) \ - mov r31=pr; \ - mov r19=n; /* prepare to save predicates */ \ - mov r29=cr.ipsr; \ - ;; \ - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ -(p7) br.sptk.many kvm_dispatch_reflection; \ - br.sptk.many kvm_vmm_panic; \ - -GLOBAL_ENTRY(kvm_vmm_panic) - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - br.call.sptk.many b6=vmm_panic_handler; -END(kvm_vmm_panic) + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7)br.sptk.many kvm_dispatch_reflection; \ + br.sptk.many kvm_panic; \ + + +GLOBAL_ENTRY(kvm_panic) + br.sptk.many kvm_panic + ;; +END(kvm_panic) + + + + .section .text.ivt,"ax" @@ -112,307 +105,308 @@ kvm_ia64_ivt: /////////////////////////////////////////////////////////////// // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) ENTRY(kvm_vhpt_miss) - KVM_FAULT(0) + KVM_FAULT(0) END(kvm_vhpt_miss) + .org kvm_ia64_ivt+0x400 //////////////////////////////////////////////////////////////// // 0x0400 Entry 1 (size 64 bundles) ITLB (21) ENTRY(kvm_itlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6) br.sptk kvm_alt_itlb_miss - mov r19 = 1 - br.sptk kvm_itlb_miss_dispatch - KVM_FAULT(1); + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; + (p6) br.sptk kvm_alt_itlb_miss + mov r19 = 1 + br.sptk kvm_itlb_miss_dispatch + KVM_FAULT(1); END(kvm_itlb_miss) .org kvm_ia64_ivt+0x0800 ////////////////////////////////////////////////////////////////// // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) ENTRY(kvm_dtlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6) br.sptk kvm_alt_dtlb_miss - br.sptk kvm_dtlb_miss_dispatch + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6)br.sptk kvm_alt_dtlb_miss + br.sptk kvm_dtlb_miss_dispatch END(kvm_dtlb_miss) .org kvm_ia64_ivt+0x0c00 //////////////////////////////////////////////////////////////////// // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) ENTRY(kvm_alt_itlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - mov r24=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r17,r19 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r17,r19 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi END(kvm_alt_itlb_miss) .org kvm_ia64_ivt+0x1000 ///////////////////////////////////////////////////////////////////// // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) ENTRY(kvm_alt_dtlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r24=cr.ipsr - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r19,r17 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r19,r17 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi END(kvm_alt_dtlb_miss) .org kvm_ia64_ivt+0x1400 ////////////////////////////////////////////////////////////////////// // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) ENTRY(kvm_nested_dtlb_miss) - KVM_FAULT(5) + KVM_FAULT(5) END(kvm_nested_dtlb_miss) .org kvm_ia64_ivt+0x1800 ///////////////////////////////////////////////////////////////////// // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) ENTRY(kvm_ikey_miss) - KVM_REFLECT(6) + KVM_REFLECT(6) END(kvm_ikey_miss) .org kvm_ia64_ivt+0x1c00 ///////////////////////////////////////////////////////////////////// // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) ENTRY(kvm_dkey_miss) - KVM_REFLECT(7) + KVM_REFLECT(7) END(kvm_dkey_miss) .org kvm_ia64_ivt+0x2000 //////////////////////////////////////////////////////////////////// // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) ENTRY(kvm_dirty_bit) - KVM_REFLECT(8) + KVM_REFLECT(8) END(kvm_dirty_bit) .org kvm_ia64_ivt+0x2400 //////////////////////////////////////////////////////////////////// // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) ENTRY(kvm_iaccess_bit) - KVM_REFLECT(9) + KVM_REFLECT(9) END(kvm_iaccess_bit) .org kvm_ia64_ivt+0x2800 /////////////////////////////////////////////////////////////////// // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) ENTRY(kvm_daccess_bit) - KVM_REFLECT(10) + KVM_REFLECT(10) END(kvm_daccess_bit) .org kvm_ia64_ivt+0x2c00 ///////////////////////////////////////////////////////////////// // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) ENTRY(kvm_break_fault) - mov r31=pr - mov r19=11 - mov r29=cr.ipsr - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!) - mov out0=cr.ifa - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.iim // FIXME: pity to make this slow access twice - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15)ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out1=16,sp - br.call.sptk.many b6=kvm_ia64_handle_break - ;; + mov r31=pr + mov r19=11 + mov r29=cr.ipsr + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15)ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out1=16,sp + br.call.sptk.many b6=kvm_ia64_handle_break + ;; END(kvm_break_fault) .org kvm_ia64_ivt+0x3000 ///////////////////////////////////////////////////////////////// // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) ENTRY(kvm_interrupt) - mov r31=pr // prepare to save predicates - mov r19=12 - mov r29=cr.ipsr - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT - tbit.z p0,p15=r29,IA64_PSR_I_BIT - ;; -(p7) br.sptk kvm_dispatch_interrupt - ;; - mov r27=ar.rsc /* M */ - mov r20=r1 /* A */ - mov r25=ar.unat /* M */ - mov r26=ar.pfs /* I */ - mov r28=cr.iip /* M */ - cover /* B (or nothing) */ - ;; - mov r1=sp - ;; - invala /* M */ - mov r30=cr.ifs - ;; - addl r1=-VMM_PT_REGS_SIZE,r1 - ;; - adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ - adds r16=PT(CR_IPSR),r1 - ;; - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES - st8 [r16]=r29 /* save cr.ipsr */ - ;; - lfetch.fault.excl.nt1 [r17] - mov r29=b0 - ;; - adds r16=PT(R8),r1 /* initialize first base pointer */ - adds r17=PT(R9),r1 /* initialize second base pointer */ - mov r18=r0 /* make sure r18 isn't NaT */ - ;; + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk kvm_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-VMM_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; .mem.offset 0,0; st8.spill [r16]=r8,16 .mem.offset 8,0; st8.spill [r17]=r9,16 ;; .mem.offset 0,0; st8.spill [r16]=r10,24 .mem.offset 8,0; st8.spill [r17]=r11,24 ;; - st8 [r16]=r28,16 /* save cr.iip */ - st8 [r17]=r30,16 /* save cr.ifs */ - mov r8=ar.fpsr /* M */ - mov r9=ar.csd - mov r10=ar.ssd - movl r11=FPSR_DEFAULT /* L-unit */ - ;; - st8 [r16]=r25,16 /* save ar.unat */ - st8 [r17]=r26,16 /* save ar.pfs */ - shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ - ;; - st8 [r16]=r27,16 /* save ar.rsc */ - adds r17=16,r17 /* skip over ar_rnat field */ - ;; - st8 [r17]=r31,16 /* save predicates */ - adds r16=16,r16 /* skip over ar_bspstore field */ - ;; - st8 [r16]=r29,16 /* save b0 */ - st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ - ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; .mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ .mem.offset 8,0; st8.spill [r17]=r12,16 - adds r12=-16,r1 - /* switch to kernel memory stack (with 16 bytes of scratch) */ - ;; + adds r12=-16,r1 + /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; .mem.offset 0,0; st8.spill [r16]=r13,16 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ - ;; + ;; .mem.offset 0,0; st8.spill [r16]=r15,16 .mem.offset 8,0; st8.spill [r17]=r14,16 - dep r14=-1,r0,60,4 - ;; + dep r14=-1,r0,60,4 + ;; .mem.offset 0,0; st8.spill [r16]=r2,16 .mem.offset 8,0; st8.spill [r17]=r3,16 - adds r2=VMM_PT_REGS_R16_OFFSET,r1 - adds r14 = VMM_VCPU_GP_OFFSET,r13 - ;; - mov r8=ar.ccv - ld8 r14 = [r14] - ;; - mov r1=r14 /* establish kernel global pointer */ - ;; \ - bsw.1 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - mov out0=r13 - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; + adds r2=VMM_PT_REGS_R16_OFFSET,r1 + adds r14 = VMM_VCPU_GP_OFFSET,r13 + ;; + mov r8=ar.ccv + ld8 r14 = [r14] + ;; + mov r1=r14 /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + mov out0=r13 + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; .mem.offset 0,0; st8.spill [r2]=r16,16 .mem.offset 8,0; st8.spill [r3]=r17,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r18,16 .mem.offset 8,0; st8.spill [r3]=r19,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r20,16 .mem.offset 8,0; st8.spill [r3]=r21,16 - mov r18=b6 - ;; + mov r18=b6 + ;; .mem.offset 0,0; st8.spill [r2]=r22,16 .mem.offset 8,0; st8.spill [r3]=r23,16 - mov r19=b7 - ;; + mov r19=b7 + ;; .mem.offset 0,0; st8.spill [r2]=r24,16 .mem.offset 8,0; st8.spill [r3]=r25,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r26,16 .mem.offset 8,0; st8.spill [r3]=r27,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r28,16 .mem.offset 8,0; st8.spill [r3]=r29,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r30,16 .mem.offset 8,0; st8.spill [r3]=r31,32 - ;; - mov ar.fpsr=r11 /* M-unit */ - st8 [r2]=r8,8 /* ar.ccv */ - adds r24=PT(B6)-PT(F7),r3 - ;; - stf.spill [r2]=f6,32 - stf.spill [r3]=f7,32 - ;; - stf.spill [r2]=f8,32 - stf.spill [r3]=f9,32 - ;; - stf.spill [r2]=f10 - stf.spill [r3]=f11 - adds r25=PT(B7)-PT(F11),r3 - ;; - st8 [r24]=r18,16 /* b6 */ - st8 [r25]=r19,16 /* b7 */ - ;; - st8 [r24]=r9 /* ar.csd */ - st8 [r25]=r10 /* ar.ssd */ - ;; - srlz.d // make sure we see the effect of cr.ivr - addl r14=@gprel(ia64_leave_nested),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_ia64_handle_irq - ;; + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + addl r14=@gprel(ia64_leave_nested),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_ia64_handle_irq + ;; END(kvm_interrupt) .global kvm_dispatch_vexirq @@ -420,385 +414,387 @@ END(kvm_interrupt) ////////////////////////////////////////////////////////////////////// // 0x3400 Entry 13 (size 64 bundles) Reserved ENTRY(kvm_virtual_exirq) - mov r31=pr - mov r19=13 - mov r30 =r0 - ;; + mov r31=pr + mov r19=13 + mov r30 =r0 + ;; kvm_dispatch_vexirq: - cmp.eq p6,p0 = 1,r30 - ;; -(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; -(p6) ld8 r1 = [r29] - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r13 - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - KVM_SAVE_REST - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_vexirq + cmp.eq p6,p0 = 1,r30 + ;; +(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; +(p6)ld8 r1 = [r29] + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + KVM_SAVE_REST + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_vexirq END(kvm_virtual_exirq) .org kvm_ia64_ivt+0x3800 ///////////////////////////////////////////////////////////////////// // 0x3800 Entry 14 (size 64 bundles) Reserved - KVM_FAULT(14) - // this code segment is from 2.6.16.13 + KVM_FAULT(14) + // this code segment is from 2.6.16.13 + .org kvm_ia64_ivt+0x3c00 /////////////////////////////////////////////////////////////////////// // 0x3c00 Entry 15 (size 64 bundles) Reserved - KVM_FAULT(15) + KVM_FAULT(15) + .org kvm_ia64_ivt+0x4000 /////////////////////////////////////////////////////////////////////// // 0x4000 Entry 16 (size 64 bundles) Reserved - KVM_FAULT(16) + KVM_FAULT(16) .org kvm_ia64_ivt+0x4400 ////////////////////////////////////////////////////////////////////// // 0x4400 Entry 17 (size 64 bundles) Reserved - KVM_FAULT(17) + KVM_FAULT(17) .org kvm_ia64_ivt+0x4800 ////////////////////////////////////////////////////////////////////// // 0x4800 Entry 18 (size 64 bundles) Reserved - KVM_FAULT(18) + KVM_FAULT(18) .org kvm_ia64_ivt+0x4c00 ////////////////////////////////////////////////////////////////////// // 0x4c00 Entry 19 (size 64 bundles) Reserved - KVM_FAULT(19) + KVM_FAULT(19) .org kvm_ia64_ivt+0x5000 ////////////////////////////////////////////////////////////////////// // 0x5000 Entry 20 (size 16 bundles) Page Not Present ENTRY(kvm_page_not_present) - KVM_REFLECT(20) + KVM_REFLECT(20) END(kvm_page_not_present) .org kvm_ia64_ivt+0x5100 /////////////////////////////////////////////////////////////////////// // 0x5100 Entry 21 (size 16 bundles) Key Permission vector ENTRY(kvm_key_permission) - KVM_REFLECT(21) + KVM_REFLECT(21) END(kvm_key_permission) .org kvm_ia64_ivt+0x5200 ////////////////////////////////////////////////////////////////////// // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) ENTRY(kvm_iaccess_rights) - KVM_REFLECT(22) + KVM_REFLECT(22) END(kvm_iaccess_rights) .org kvm_ia64_ivt+0x5300 ////////////////////////////////////////////////////////////////////// // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) ENTRY(kvm_daccess_rights) - KVM_REFLECT(23) + KVM_REFLECT(23) END(kvm_daccess_rights) .org kvm_ia64_ivt+0x5400 ///////////////////////////////////////////////////////////////////// // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) ENTRY(kvm_general_exception) - KVM_REFLECT(24) - KVM_FAULT(24) + KVM_REFLECT(24) + KVM_FAULT(24) END(kvm_general_exception) .org kvm_ia64_ivt+0x5500 ////////////////////////////////////////////////////////////////////// // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) ENTRY(kvm_disabled_fp_reg) - KVM_REFLECT(25) + KVM_REFLECT(25) END(kvm_disabled_fp_reg) .org kvm_ia64_ivt+0x5600 //////////////////////////////////////////////////////////////////// // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) ENTRY(kvm_nat_consumption) - KVM_REFLECT(26) + KVM_REFLECT(26) END(kvm_nat_consumption) .org kvm_ia64_ivt+0x5700 ///////////////////////////////////////////////////////////////////// // 0x5700 Entry 27 (size 16 bundles) Speculation (40) ENTRY(kvm_speculation_vector) - KVM_REFLECT(27) + KVM_REFLECT(27) END(kvm_speculation_vector) .org kvm_ia64_ivt+0x5800 ///////////////////////////////////////////////////////////////////// // 0x5800 Entry 28 (size 16 bundles) Reserved - KVM_FAULT(28) + KVM_FAULT(28) .org kvm_ia64_ivt+0x5900 /////////////////////////////////////////////////////////////////// // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) ENTRY(kvm_debug_vector) - KVM_FAULT(29) + KVM_FAULT(29) END(kvm_debug_vector) .org kvm_ia64_ivt+0x5a00 /////////////////////////////////////////////////////////////// // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) ENTRY(kvm_unaligned_access) - KVM_REFLECT(30) + KVM_REFLECT(30) END(kvm_unaligned_access) .org kvm_ia64_ivt+0x5b00 ////////////////////////////////////////////////////////////////////// // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) ENTRY(kvm_unsupported_data_reference) - KVM_REFLECT(31) + KVM_REFLECT(31) END(kvm_unsupported_data_reference) .org kvm_ia64_ivt+0x5c00 //////////////////////////////////////////////////////////////////// // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) ENTRY(kvm_floating_point_fault) - KVM_REFLECT(32) + KVM_REFLECT(32) END(kvm_floating_point_fault) .org kvm_ia64_ivt+0x5d00 ///////////////////////////////////////////////////////////////////// // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) ENTRY(kvm_floating_point_trap) - KVM_REFLECT(33) + KVM_REFLECT(33) END(kvm_floating_point_trap) .org kvm_ia64_ivt+0x5e00 ////////////////////////////////////////////////////////////////////// // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) ENTRY(kvm_lower_privilege_trap) - KVM_REFLECT(34) + KVM_REFLECT(34) END(kvm_lower_privilege_trap) .org kvm_ia64_ivt+0x5f00 ////////////////////////////////////////////////////////////////////// // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) ENTRY(kvm_taken_branch_trap) - KVM_REFLECT(35) + KVM_REFLECT(35) END(kvm_taken_branch_trap) .org kvm_ia64_ivt+0x6000 //////////////////////////////////////////////////////////////////// // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) ENTRY(kvm_single_step_trap) - KVM_REFLECT(36) + KVM_REFLECT(36) END(kvm_single_step_trap) .global kvm_virtualization_fault_back .org kvm_ia64_ivt+0x6100 ///////////////////////////////////////////////////////////////////// // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault ENTRY(kvm_virtualization_fault) - mov r31=pr - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - st8 [r16] = r1 - adds r17 = VMM_VCPU_GP_OFFSET, r21 - ;; - ld8 r1 = [r17] - cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 - cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 - cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 - cmp.eq p9,p0=EVENT_RSM,r24 - cmp.eq p10,p0=EVENT_SSM,r24 - cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 - cmp.eq p12,p0=EVENT_THASH,r24 -(p6) br.dptk.many kvm_asm_mov_from_ar -(p7) br.dptk.many kvm_asm_mov_from_rr -(p8) br.dptk.many kvm_asm_mov_to_rr -(p9) br.dptk.many kvm_asm_rsm -(p10) br.dptk.many kvm_asm_ssm -(p11) br.dptk.many kvm_asm_mov_to_psr -(p12) br.dptk.many kvm_asm_thash - ;; + mov r31=pr + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + st8 [r16] = r1 + adds r17 = VMM_VCPU_GP_OFFSET, r21 + ;; + ld8 r1 = [r17] + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 + cmp.eq p9,p0=EVENT_RSM,r24 + cmp.eq p10,p0=EVENT_SSM,r24 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 + cmp.eq p12,p0=EVENT_THASH,r24 + (p6) br.dptk.many kvm_asm_mov_from_ar + (p7) br.dptk.many kvm_asm_mov_from_rr + (p8) br.dptk.many kvm_asm_mov_to_rr + (p9) br.dptk.many kvm_asm_rsm + (p10) br.dptk.many kvm_asm_ssm + (p11) br.dptk.many kvm_asm_mov_to_psr + (p12) br.dptk.many kvm_asm_thash + ;; kvm_virtualization_fault_back: - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - ld8 r1 = [r16] - ;; - mov r19=37 - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - cmp.ne p6,p0=EVENT_RFI, r24 -(p6) br.sptk kvm_dispatch_virtualization_fault - ;; - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] - ;; - adds r18=VMM_VPD_VIFS_OFFSET,r18 - ;; - ld8 r18=[r18] - ;; - tbit.z p6,p0=r18,63 -(p6) br.sptk kvm_dispatch_virtualization_fault - ;; -//if vifs.v=1 desert current register frame - alloc r18=ar.pfs,0,0,0,0 - br.sptk kvm_dispatch_virtualization_fault + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 = [r16] + ;; + mov r19=37 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + cmp.ne p6,p0=EVENT_RFI, r24 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] + ;; + adds r18=VMM_VPD_VIFS_OFFSET,r18 + ;; + ld8 r18=[r18] + ;; + tbit.z p6,p0=r18,63 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + //if vifs.v=1 desert current register frame + alloc r18=ar.pfs,0,0,0,0 + br.sptk kvm_dispatch_virtualization_fault END(kvm_virtualization_fault) .org kvm_ia64_ivt+0x6200 ////////////////////////////////////////////////////////////// // 0x6200 Entry 38 (size 16 bundles) Reserved - KVM_FAULT(38) + KVM_FAULT(38) .org kvm_ia64_ivt+0x6300 ///////////////////////////////////////////////////////////////// // 0x6300 Entry 39 (size 16 bundles) Reserved - KVM_FAULT(39) + KVM_FAULT(39) .org kvm_ia64_ivt+0x6400 ///////////////////////////////////////////////////////////////// // 0x6400 Entry 40 (size 16 bundles) Reserved - KVM_FAULT(40) + KVM_FAULT(40) .org kvm_ia64_ivt+0x6500 ////////////////////////////////////////////////////////////////// // 0x6500 Entry 41 (size 16 bundles) Reserved - KVM_FAULT(41) + KVM_FAULT(41) .org kvm_ia64_ivt+0x6600 ////////////////////////////////////////////////////////////////// // 0x6600 Entry 42 (size 16 bundles) Reserved - KVM_FAULT(42) + KVM_FAULT(42) .org kvm_ia64_ivt+0x6700 ////////////////////////////////////////////////////////////////// // 0x6700 Entry 43 (size 16 bundles) Reserved - KVM_FAULT(43) + KVM_FAULT(43) .org kvm_ia64_ivt+0x6800 ////////////////////////////////////////////////////////////////// // 0x6800 Entry 44 (size 16 bundles) Reserved - KVM_FAULT(44) + KVM_FAULT(44) .org kvm_ia64_ivt+0x6900 /////////////////////////////////////////////////////////////////// // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) ENTRY(kvm_ia32_exception) - KVM_FAULT(45) + KVM_FAULT(45) END(kvm_ia32_exception) .org kvm_ia64_ivt+0x6a00 //////////////////////////////////////////////////////////////////// // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) ENTRY(kvm_ia32_intercept) - KVM_FAULT(47) + KVM_FAULT(47) END(kvm_ia32_intercept) .org kvm_ia64_ivt+0x6c00 ///////////////////////////////////////////////////////////////////// // 0x6c00 Entry 48 (size 16 bundles) Reserved - KVM_FAULT(48) + KVM_FAULT(48) .org kvm_ia64_ivt+0x6d00 ////////////////////////////////////////////////////////////////////// // 0x6d00 Entry 49 (size 16 bundles) Reserved - KVM_FAULT(49) + KVM_FAULT(49) .org kvm_ia64_ivt+0x6e00 ////////////////////////////////////////////////////////////////////// // 0x6e00 Entry 50 (size 16 bundles) Reserved - KVM_FAULT(50) + KVM_FAULT(50) .org kvm_ia64_ivt+0x6f00 ///////////////////////////////////////////////////////////////////// // 0x6f00 Entry 51 (size 16 bundles) Reserved - KVM_FAULT(52) + KVM_FAULT(52) .org kvm_ia64_ivt+0x7100 //////////////////////////////////////////////////////////////////// // 0x7100 Entry 53 (size 16 bundles) Reserved - KVM_FAULT(53) + KVM_FAULT(53) .org kvm_ia64_ivt+0x7200 ///////////////////////////////////////////////////////////////////// // 0x7200 Entry 54 (size 16 bundles) Reserved - KVM_FAULT(54) + KVM_FAULT(54) .org kvm_ia64_ivt+0x7300 //////////////////////////////////////////////////////////////////// // 0x7300 Entry 55 (size 16 bundles) Reserved - KVM_FAULT(55) + KVM_FAULT(55) .org kvm_ia64_ivt+0x7400 //////////////////////////////////////////////////////////////////// // 0x7400 Entry 56 (size 16 bundles) Reserved - KVM_FAULT(56) + KVM_FAULT(56) .org kvm_ia64_ivt+0x7500 ///////////////////////////////////////////////////////////////////// // 0x7500 Entry 57 (size 16 bundles) Reserved - KVM_FAULT(57) + KVM_FAULT(57) .org kvm_ia64_ivt+0x7600 ///////////////////////////////////////////////////////////////////// // 0x7600 Entry 58 (size 16 bundles) Reserved - KVM_FAULT(58) + KVM_FAULT(58) .org kvm_ia64_ivt+0x7700 //////////////////////////////////////////////////////////////////// // 0x7700 Entry 59 (size 16 bundles) Reserved - KVM_FAULT(59) + KVM_FAULT(59) .org kvm_ia64_ivt+0x7800 //////////////////////////////////////////////////////////////////// // 0x7800 Entry 60 (size 16 bundles) Reserved - KVM_FAULT(60) + KVM_FAULT(60) .org kvm_ia64_ivt+0x7900 ///////////////////////////////////////////////////////////////////// // 0x7900 Entry 61 (size 16 bundles) Reserved - KVM_FAULT(61) + KVM_FAULT(61) .org kvm_ia64_ivt+0x7a00 ///////////////////////////////////////////////////////////////////// // 0x7a00 Entry 62 (size 16 bundles) Reserved - KVM_FAULT(62) + KVM_FAULT(62) .org kvm_ia64_ivt+0x7b00 ///////////////////////////////////////////////////////////////////// // 0x7b00 Entry 63 (size 16 bundles) Reserved - KVM_FAULT(63) + KVM_FAULT(63) .org kvm_ia64_ivt+0x7c00 //////////////////////////////////////////////////////////////////// // 0x7c00 Entry 64 (size 16 bundles) Reserved - KVM_FAULT(64) + KVM_FAULT(64) .org kvm_ia64_ivt+0x7d00 ///////////////////////////////////////////////////////////////////// // 0x7d00 Entry 65 (size 16 bundles) Reserved - KVM_FAULT(65) + KVM_FAULT(65) .org kvm_ia64_ivt+0x7e00 ///////////////////////////////////////////////////////////////////// // 0x7e00 Entry 66 (size 16 bundles) Reserved - KVM_FAULT(66) + KVM_FAULT(66) .org kvm_ia64_ivt+0x7f00 //////////////////////////////////////////////////////////////////// // 0x7f00 Entry 67 (size 16 bundles) Reserved - KVM_FAULT(67) + KVM_FAULT(67) .org kvm_ia64_ivt+0x8000 // There is no particular reason for this code to be here, other than that @@ -808,128 +804,132 @@ END(kvm_ia32_intercept) ENTRY(kvm_dtlb_miss_dispatch) - mov r19 = 2 - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault + mov r19 = 2 + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault END(kvm_dtlb_miss_dispatch) ENTRY(kvm_itlb_miss_dispatch) - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault END(kvm_itlb_miss_dispatch) ENTRY(kvm_dispatch_reflection) -/* - * Input: - * psr.ic: off - * r19: intr type (offset into ivt, see ia64_int.h) - * r31: contains saved predicates (pr) - */ - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out0=cr.ifa - mov out1=cr.isr - mov out2=cr.iim - mov out3=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out4=16,r12 - br.call.sptk.many b6=reflect_interruption + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out4=16,r12 + br.call.sptk.many b6=reflect_interruption END(kvm_dispatch_reflection) ENTRY(kvm_dispatch_virtualization_fault) - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!) - mov out0=r13 //vcpu - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out1=16,sp //regs - br.call.sptk.many b6=kvm_emulate + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + mov out0=r13 //vcpu + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out1=16,sp //regs + br.call.sptk.many b6=kvm_emulate END(kvm_dispatch_virtualization_fault) ENTRY(kvm_dispatch_interrupt) - KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - mov out0=r13 // pass pointer to pt_regs as second arg - br.call.sptk.many b6=kvm_ia64_handle_irq + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + //mov out0=cr.ivr // pass cr.ivr as first arg + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + mov out0=r13 // pass pointer to pt_regs as second arg + br.call.sptk.many b6=kvm_ia64_handle_irq END(kvm_dispatch_interrupt) + + + GLOBAL_ENTRY(ia64_leave_nested) rsm psr.i ;; @@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested) ;; ldf.fill f11=[r2] // mov r18=r13 -// mov r21=r13 +// mov r21=r13 adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 ;; @@ -1058,135 +1058,138 @@ GLOBAL_ENTRY(ia64_leave_nested) rfi END(ia64_leave_nested) + + GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) -/* - * work.need_resched etc. mustn't get changed - *by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on: - */ - adds r2 = PT(R4)+16,r12 - adds r3 = PT(R5)+16,r12 - adds r8 = PT(EML_UNAT)+16,r12 - ;; - ld8 r8 = [r8] - ;; - mov ar.unat=r8 - ;; - ld8.fill r4=[r2],16 //load r4 - ld8.fill r5=[r3],16 //load r5 - ;; - ld8.fill r6=[r2] //load r6 - ld8.fill r7=[r3] //load r7 - ;; + /* + * work.need_resched etc. mustn't get changed + *by this CPU before it returns to + ;; + * user- or fsys-mode, hence we disable interrupts early on: + */ + adds r2 = PT(R4)+16,r12 + adds r3 = PT(R5)+16,r12 + adds r8 = PT(EML_UNAT)+16,r12 + ;; + ld8 r8 = [r8] + ;; + mov ar.unat=r8 + ;; + ld8.fill r4=[r2],16 //load r4 + ld8.fill r5=[r3],16 //load r5 + ;; + ld8.fill r6=[r2] //load r6 + ld8.fill r7=[r3] //load r7 + ;; END(ia64_leave_hypervisor_prepare) //fall through GLOBAL_ENTRY(ia64_leave_hypervisor) - rsm psr.i - ;; - br.call.sptk.many b0=leave_hypervisor_tail - ;; - adds r20=PT(PR)+16,r12 - adds r8=PT(EML_UNAT)+16,r12 - ;; - ld8 r8=[r8] - ;; - mov ar.unat=r8 - ;; - lfetch [r20],PT(CR_IPSR)-PT(PR) - adds r2 = PT(B6)+16,r12 - adds r3 = PT(B7)+16,r12 - ;; - lfetch [r20] - ;; - ld8 r24=[r2],16 /* B6 */ - ld8 r25=[r3],16 /* B7 */ - ;; - ld8 r26=[r2],16 /* ar_csd */ - ld8 r27=[r3],16 /* ar_ssd */ - mov b6 = r24 - ;; - ld8.fill r8=[r2],16 - ld8.fill r9=[r3],16 - mov b7 = r25 - ;; - mov ar.csd = r26 - mov ar.ssd = r27 - ;; - ld8.fill r10=[r2],PT(R15)-PT(R10) - ld8.fill r11=[r3],PT(R14)-PT(R11) - ;; - ld8.fill r15=[r2],PT(R16)-PT(R15) - ld8.fill r14=[r3],PT(R17)-PT(R14) - ;; - ld8.fill r16=[r2],16 - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - ;; - ld8.fill r22=[r2],16 - ld8.fill r23=[r3],16 - ;; - ld8.fill r24=[r2],16 - ld8.fill r25=[r3],16 - ;; - ld8.fill r26=[r2],16 - ld8.fill r27=[r3],16 - ;; - ld8.fill r28=[r2],16 - ld8.fill r29=[r3],16 - ;; - ld8.fill r30=[r2],PT(F6)-PT(R30) - ld8.fill r31=[r3],PT(F7)-PT(R31) - ;; - rsm psr.i | psr.ic - // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - srlz.i // ensure interruption collection is off - ;; - bsw.0 - ;; - adds r16 = PT(CR_IPSR)+16,r12 - adds r17 = PT(CR_IIP)+16,r12 - mov r21=r13 // get current - ;; - ld8 r31=[r16],16 // load cr.ipsr - ld8 r30=[r17],16 // load cr.iip - ;; - ld8 r29=[r16],16 // load cr.ifs - ld8 r28=[r17],16 // load ar.unat - ;; - ld8 r27=[r16],16 // load ar.pfs - ld8 r26=[r17],16 // load ar.rsc - ;; - ld8 r25=[r16],16 // load ar.rnat - ld8 r24=[r17],16 // load ar.bspstore - ;; - ld8 r23=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r20=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 //load r1 - ;; - ld8.fill r12=[r16],16 //load r12 - ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 - ;; - ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr - ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 - ;; - ld8.fill r3=[r16] //load r3 - ld8 r18=[r17] //load ar_ccv - ;; - mov ar.fpsr=r19 - mov ar.ccv=r18 - shr.u r18=r20,16 - ;; + rsm psr.i + ;; + br.call.sptk.many b0=leave_hypervisor_tail + ;; + adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r2 = PT(B6)+16,r12 + adds r3 = PT(B7)+16,r12 + ;; + lfetch [r20] + ;; + ld8 r24=[r2],16 /* B6 */ + ld8 r25=[r3],16 /* B7 */ + ;; + ld8 r26=[r2],16 /* ar_csd */ + ld8 r27=[r3],16 /* ar_ssd */ + mov b6 = r24 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + mov b7 = r25 + ;; + mov ar.csd = r26 + mov ar.ssd = r27 + ;; + ld8.fill r10=[r2],PT(R15)-PT(R10) + ld8.fill r11=[r3],PT(R14)-PT(R11) + ;; + ld8.fill r15=[r2],PT(R16)-PT(R15) + ld8.fill r14=[r3],PT(R17)-PT(R14) + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],PT(F6)-PT(R30) + ld8.fill r31=[r3],PT(F7)-PT(R31) + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i // ensure interruption collection is off + ;; + bsw.0 + ;; + adds r16 = PT(CR_IPSR)+16,r12 + adds r17 = PT(CR_IIP)+16,r12 + mov r21=r13 // get current + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat + ld8 r24=[r17],16 // load ar.bspstore + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 //load r1 + ;; + ld8.fill r12=[r16],16 //load r12 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 + ;; + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 + ;; + ld8.fill r3=[r16] //load r3 + ld8 r18=[r17] //load ar_ccv + ;; + mov ar.fpsr=r19 + mov ar.ccv=r18 + shr.u r18=r20,16 + ;; kvm_rbs_switch: - mov r19=96 + mov r19=96 kvm_dont_preserve_current_frame: /* @@ -1198,76 +1201,76 @@ kvm_dont_preserve_current_frame: # define pReturn p7 # define Nregs 14 - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r19 - mov in1=0 - ;; - TEXT_ALIGN(32) + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) kvm_rse_clear_invalid: - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 - // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 + // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 - // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 + // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 (pReturn) br.ret.dptk.many b0 # undef pRecurse # undef pReturn // loadrs has already been shifted - alloc r16=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; - mov ar.bspstore=r24 - ;; - mov ar.unat=r28 - mov ar.rnat=r25 - mov ar.rsc=r26 - ;; - mov cr.ipsr=r31 - mov cr.iip=r30 - mov cr.ifs=r29 - mov ar.pfs=r27 - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] //vpd - adds r17=VMM_VCPU_ISR_OFFSET,r21 - ;; - ld8 r17=[r17] - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] //vpsr - mov r25=r18 - adds r16= VMM_VCPU_GP_OFFSET,r21 - ;; - ld8 r16= [r16] // Put gp in r24 - movl r24=@gprel(ia64_vmm_entry) // calculate return address - ;; - add r24=r24,r16 - ;; - br.sptk.many kvm_vps_sync_write // call the service - ;; + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + mov ar.pfs=r27 + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] //vpd + adds r17=VMM_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] //vpsr + mov r25=r18 + adds r16= VMM_VCPU_GP_OFFSET,r21 + ;; + ld8 r16= [r16] // Put gp in r24 + movl r24=@gprel(ia64_vmm_entry) // calculate return address + ;; + add r24=r24,r16 + ;; + br.sptk.many kvm_vps_sync_write // call the service + ;; END(ia64_leave_hypervisor) // fall through GLOBAL_ENTRY(ia64_vmm_entry) @@ -1280,14 +1283,16 @@ GLOBAL_ENTRY(ia64_vmm_entry) * r22:b0 * r23:predicate */ - mov r24=r22 - mov r25=r18 - tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic -(p1) br.cond.sptk.few kvm_vps_resume_normal -(p2) br.cond.sptk.many kvm_vps_resume_handler - ;; + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + (p1) br.cond.sptk.few kvm_vps_resume_normal + (p2) br.cond.sptk.many kvm_vps_resume_handler + ;; END(ia64_vmm_entry) + + /* * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, * u64 arg3, u64 arg4, u64 arg5, @@ -1305,88 +1310,88 @@ psrsave = loc2 entry = loc3 hostret = r24 - alloc pfssave=ar.pfs,4,4,0,0 - mov rpsave=rp - adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 - ;; - ld8 entry=[entry] -1: mov hostret=ip - mov r25=in1 // copy arguments - mov r26=in2 - mov r27=in3 - mov psrsave=psr - ;; - tbit.nz p6,p0=psrsave,14 // IA64_PSR_I - tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC - ;; - add hostret=2f-1b,hostret // calculate return address - add entry=entry,in0 - ;; - rsm psr.i | psr.ic - ;; - srlz.i - mov b6=entry - br.cond.sptk b6 // call the service + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 + ;; + ld8 entry=[entry] +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.i + mov b6=entry + br.cond.sptk b6 // call the service 2: -// Architectural sequence for enabling interrupts if necessary + // Architectural sequence for enabling interrupts if necessary (p7) ssm psr.ic - ;; + ;; (p7) srlz.i - ;; + ;; //(p6) ssm psr.i - ;; - mov rp=rpsave - mov ar.pfs=pfssave - mov r8=r31 - ;; - srlz.d - br.ret.sptk rp + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp END(ia64_call_vsa) #define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) GLOBAL_ENTRY(vmm_reset_entry) - //set up ipsr, iip, vpd.vpsr, dcr - // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 - // For DCR: all bits 0 - bsw.0 - ;; - mov r21 =r13 - adds r14=-VMM_PT_REGS_SIZE, r12 - ;; - movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 - movl r10=0x8000000000000000 - adds r16=PT(CR_IIP), r14 - adds r20=PT(R1), r14 - ;; - rsm psr.ic | psr.i - ;; - srlz.i - ;; - mov ar.rsc = 0 - ;; - flushrs - ;; - mov ar.bspstore = 0 - // clear BSPSTORE - ;; - mov cr.ipsr=r6 - mov cr.ifs=r10 - ld8 r4 = [r16] // Set init iip for first run. - ld8 r1 = [r20] - ;; - mov cr.iip=r4 - adds r16=VMM_VPD_BASE_OFFSET,r13 - ;; - ld8 r18=[r16] - ;; - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] - mov r17=r0 - mov r22=r0 - mov r23=r0 - br.cond.sptk ia64_vmm_entry - br.ret.sptk b0 + //set up ipsr, iip, vpd.vpsr, dcr + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 + // For DCR: all bits 0 + bsw.0 + ;; + mov r21 =r13 + adds r14=-VMM_PT_REGS_SIZE, r12 + ;; + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 + movl r10=0x8000000000000000 + adds r16=PT(CR_IIP), r14 + adds r20=PT(R1), r14 + ;; + rsm psr.ic | psr.i + ;; + srlz.i + ;; + mov ar.rsc = 0 + ;; + flushrs + ;; + mov ar.bspstore = 0 + // clear BSPSTORE + ;; + mov cr.ipsr=r6 + mov cr.ifs=r10 + ld8 r4 = [r16] // Set init iip for first run. + ld8 r1 = [r20] + ;; + mov cr.iip=r4 + adds r16=VMM_VPD_BASE_OFFSET,r13 + ;; + ld8 r18=[r16] + ;; + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] + mov r17=r0 + mov r22=r0 + mov r23=r0 + br.cond.sptk ia64_vmm_entry + br.ret.sptk b0 END(vmm_reset_entry) diff --git a/trunk/arch/ia64/kvm/vtlb.c b/trunk/arch/ia64/kvm/vtlb.c index 6b6307a3bd55..e22b93361e08 100644 --- a/trunk/arch/ia64/kvm/vtlb.c +++ b/trunk/arch/ia64/kvm/vtlb.c @@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) u64 i, dirty_pages = 1; u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); - void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; - + void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) + + KVM_MEM_DIRTY_LOG_OFS; dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; vmm_spin_lock(lock); diff --git a/trunk/arch/ia64/sn/kernel/irq.c b/trunk/arch/ia64/sn/kernel/irq.c index 66fd705e82c0..0c66dbdd1d72 100644 --- a/trunk/arch/ia64/sn/kernel/irq.c +++ b/trunk/arch/ia64/sn/kernel/irq.c @@ -227,14 +227,14 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, return new_irq_info; } -static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) +static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) { struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; nasid_t nasid; int slice; - nasid = cpuid_to_nasid(cpumask_first(mask)); - slice = cpuid_to_slice(cpumask_first(mask)); + nasid = cpuid_to_nasid(first_cpu(mask)); + slice = cpuid_to_slice(first_cpu(mask)); list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) diff --git a/trunk/arch/ia64/sn/kernel/msi_sn.c b/trunk/arch/ia64/sn/kernel/msi_sn.c index ca553b0429ce..83f190ffe350 100644 --- a/trunk/arch/ia64/sn/kernel/msi_sn.c +++ b/trunk/arch/ia64/sn/kernel/msi_sn.c @@ -151,8 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) } #ifdef CONFIG_SMP -static void sn_set_msi_irq_affinity(unsigned int irq, - const struct cpumask *cpu_mask) +static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) { struct msi_msg msg; int slice; @@ -165,7 +164,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, struct sn_pcibus_provider *provider; unsigned int cpu; - cpu = cpumask_first(cpu_mask); + cpu = first_cpu(cpu_mask); sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) return; @@ -205,7 +204,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = *cpu_mask; + irq_desc[irq].affinity = cpu_mask; } #endif /* CONFIG_SMP */ diff --git a/trunk/arch/m32r/Kconfig b/trunk/arch/m32r/Kconfig index cabba332cc48..29047d5c259a 100644 --- a/trunk/arch/m32r/Kconfig +++ b/trunk/arch/m32r/Kconfig @@ -10,7 +10,6 @@ config M32R default y select HAVE_IDE select HAVE_OPROFILE - select INIT_ALL_POSSIBLE config SBUS bool diff --git a/trunk/arch/m32r/kernel/smpboot.c b/trunk/arch/m32r/kernel/smpboot.c index 0f06b3722e96..39cb6da72dcb 100644 --- a/trunk/arch/m32r/kernel/smpboot.c +++ b/trunk/arch/m32r/kernel/smpboot.c @@ -73,11 +73,17 @@ static unsigned int bsp_phys_id = -1; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; +/* Bitmask of currently online CPUs */ +cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); + cpumask_t cpu_bootout_map; cpumask_t cpu_bootin_map; static cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); +cpumask_t cpu_possible_map = CPU_MASK_ALL; +EXPORT_SYMBOL(cpu_possible_map); /* Per CPU bogomips and other parameters */ struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned; diff --git a/trunk/arch/m68k/Kconfig b/trunk/arch/m68k/Kconfig index 836fb66f080d..c825bde17cb3 100644 --- a/trunk/arch/m68k/Kconfig +++ b/trunk/arch/m68k/Kconfig @@ -280,7 +280,6 @@ config M68060 config MMU_MOTOROLA bool - depends on MMU && !MMU_SUN3 config MMU_SUN3 bool diff --git a/trunk/arch/m68knommu/platform/coldfire/pit.c b/trunk/arch/m68knommu/platform/coldfire/pit.c index 2a12e7fa9748..c5b916700b22 100644 --- a/trunk/arch/m68knommu/platform/coldfire/pit.c +++ b/trunk/arch/m68knommu/platform/coldfire/pit.c @@ -156,7 +156,7 @@ void hw_timer_init(void) { u32 imr; - cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id()); + cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32); cf_pit_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFF, &cf_pit_clockevent); diff --git a/trunk/arch/mips/include/asm/irq.h b/trunk/arch/mips/include/asm/irq.h index abc62aa744ac..a58f0eecc68f 100644 --- a/trunk/arch/mips/include/asm/irq.h +++ b/trunk/arch/mips/include/asm/irq.h @@ -49,8 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq) #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF #include -extern void plat_set_irq_affinity(unsigned int irq, - const struct cpumask *affinity); +extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity); extern void smtc_forward_irq(unsigned int irq); /* diff --git a/trunk/arch/mips/include/asm/mach-ip27/topology.h b/trunk/arch/mips/include/asm/mach-ip27/topology.h index 1fb959f98982..7785bec732f2 100644 --- a/trunk/arch/mips/include/asm/mach-ip27/topology.h +++ b/trunk/arch/mips/include/asm/mach-ip27/topology.h @@ -37,6 +37,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; /* sched_domains SD_NODE_INIT for SGI IP27 machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/mips/include/asm/smp.h b/trunk/arch/mips/include/asm/smp.h index 86557b5d1b3f..0ff5b523ea77 100644 --- a/trunk/arch/mips/include/asm/smp.h +++ b/trunk/arch/mips/include/asm/smp.h @@ -38,6 +38,9 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */ #define SMP_CALL_FUNCTION 0x2 +extern cpumask_t phys_cpu_present_map; +#define cpu_possible_map phys_cpu_present_map + extern void asmlinkage smp_bootstrap(void); /* diff --git a/trunk/arch/mips/jazz/irq.c b/trunk/arch/mips/jazz/irq.c index 03965cb1b252..d7f8a782aae4 100644 --- a/trunk/arch/mips/jazz/irq.c +++ b/trunk/arch/mips/jazz/irq.c @@ -146,7 +146,7 @@ void __init plat_time_init(void) BUG_ON(HZ != 100); - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); clockevents_register_device(cd); action->dev_id = cd; setup_irq(JAZZ_TIMER_IRQ, action); diff --git a/trunk/arch/mips/kernel/cevt-bcm1480.c b/trunk/arch/mips/kernel/cevt-bcm1480.c index b820661678b0..0a57f86945f1 100644 --- a/trunk/arch/mips/kernel/cevt-bcm1480.c +++ b/trunk/arch/mips/kernel/cevt-bcm1480.c @@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void) cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); cd->set_next_event = sibyte_next_event; cd->set_mode = sibyte_set_mode; clockevents_register_device(cd); @@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void) action->name = name; action->dev_id = cd; - irq_set_affinity(irq, cpumask_of(cpu)); + irq_set_affinity(irq, cpumask_of_cpu(cpu)); setup_irq(irq, action); } diff --git a/trunk/arch/mips/kernel/cevt-ds1287.c b/trunk/arch/mips/kernel/cevt-ds1287.c index 1ada45ea0700..df4acb68bfb5 100644 --- a/trunk/arch/mips/kernel/cevt-ds1287.c +++ b/trunk/arch/mips/kernel/cevt-ds1287.c @@ -88,6 +88,7 @@ static void ds1287_event_handler(struct clock_event_device *dev) static struct clock_event_device ds1287_clockevent = { .name = "ds1287", .features = CLOCK_EVT_FEAT_PERIODIC, + .cpumask = CPU_MASK_CPU0, .set_next_event = ds1287_set_next_event, .set_mode = ds1287_set_mode, .event_handler = ds1287_event_handler, @@ -121,7 +122,6 @@ int __init ds1287_clockevent_init(int irq) clockevent_set_clock(cd, 32768); cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); cd->min_delta_ns = clockevent_delta2ns(0x300, cd); - cd->cpumask = cpumask_of(0); clockevents_register_device(&ds1287_clockevent); diff --git a/trunk/arch/mips/kernel/cevt-gt641xx.c b/trunk/arch/mips/kernel/cevt-gt641xx.c index e9b787feedcb..6e2f58520afb 100644 --- a/trunk/arch/mips/kernel/cevt-gt641xx.c +++ b/trunk/arch/mips/kernel/cevt-gt641xx.c @@ -96,6 +96,7 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev) static struct clock_event_device gt641xx_timer0_clockevent = { .name = "gt641xx-timer0", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .cpumask = CPU_MASK_CPU0, .irq = GT641XX_TIMER0_IRQ, .set_next_event = gt641xx_timer0_set_next_event, .set_mode = gt641xx_timer0_set_mode, @@ -131,7 +132,6 @@ static int __init gt641xx_timer0_clockevent_init(void) clockevent_set_clock(cd, gt641xx_base_clock); cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); cd->min_delta_ns = clockevent_delta2ns(0x300, cd); - cd->cpumask = cpumask_of(0); clockevents_register_device(>641xx_timer0_clockevent); diff --git a/trunk/arch/mips/kernel/cevt-r4k.c b/trunk/arch/mips/kernel/cevt-r4k.c index e1ec83b68031..4a4c59f2737a 100644 --- a/trunk/arch/mips/kernel/cevt-r4k.c +++ b/trunk/arch/mips/kernel/cevt-r4k.c @@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); cd->set_next_event = mips_next_event; cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; diff --git a/trunk/arch/mips/kernel/cevt-sb1250.c b/trunk/arch/mips/kernel/cevt-sb1250.c index a2eebaafda52..63ac3ad462bc 100644 --- a/trunk/arch/mips/kernel/cevt-sb1250.c +++ b/trunk/arch/mips/kernel/cevt-sb1250.c @@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void) cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); cd->set_next_event = sibyte_next_event; cd->set_mode = sibyte_set_mode; clockevents_register_device(cd); @@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void) action->name = name; action->dev_id = cd; - irq_set_affinity(irq, cpumask_of(cpu)); + irq_set_affinity(irq, cpumask_of_cpu(cpu)); setup_irq(irq, action); } diff --git a/trunk/arch/mips/kernel/cevt-smtc.c b/trunk/arch/mips/kernel/cevt-smtc.c index 6d45e24db5bf..5162fe4b5952 100644 --- a/trunk/arch/mips/kernel/cevt-smtc.c +++ b/trunk/arch/mips/kernel/cevt-smtc.c @@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); cd->set_next_event = mips_next_event; cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; diff --git a/trunk/arch/mips/kernel/cevt-txx9.c b/trunk/arch/mips/kernel/cevt-txx9.c index eccf7d6096bd..b5fc4eb412d2 100644 --- a/trunk/arch/mips/kernel/cevt-txx9.c +++ b/trunk/arch/mips/kernel/cevt-txx9.c @@ -112,6 +112,7 @@ static struct clock_event_device txx9tmr_clock_event_device = { .name = "TXx9", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .rating = 200, + .cpumask = CPU_MASK_CPU0, .set_mode = txx9tmr_set_mode, .set_next_event = txx9tmr_set_next_event, }; @@ -149,7 +150,6 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq, clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd); cd->min_delta_ns = clockevent_delta2ns(0xf, cd); cd->irq = irq; - cd->cpumask = cpumask_of(0), clockevents_register_device(cd); setup_irq(irq, &txx9tmr_irq); printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n", diff --git a/trunk/arch/mips/kernel/i8253.c b/trunk/arch/mips/kernel/i8253.c index f4d187825f96..b6ac55162b9a 100644 --- a/trunk/arch/mips/kernel/i8253.c +++ b/trunk/arch/mips/kernel/i8253.c @@ -115,7 +115,7 @@ void __init setup_pit_timer(void) * Start pit with the boot cpu mask and make it global after the * IO_APIC has been initialized. */ - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); clockevent_set_clock(cd, CLOCK_TICK_RATE); cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd); cd->min_delta_ns = clockevent_delta2ns(0xF, cd); diff --git a/trunk/arch/mips/kernel/irq-gic.c b/trunk/arch/mips/kernel/irq-gic.c index 494a49a317e9..f0a4bb19e096 100644 --- a/trunk/arch/mips/kernel/irq-gic.c +++ b/trunk/arch/mips/kernel/irq-gic.c @@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq) static DEFINE_SPINLOCK(gic_lock); -static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) { cpumask_t tmp = CPU_MASK_NONE; unsigned long flags; @@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) pr_debug(KERN_DEBUG "%s called\n", __func__); irq -= _irqbase; - cpumask_and(&tmp, cpumask, cpu_online_mask); + cpus_and(tmp, cpumask, cpu_online_map); if (cpus_empty(tmp)) return; @@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); } - irq_desc[irq].affinity = *cpumask; + irq_desc[irq].affinity = cpumask; spin_unlock_irqrestore(&gic_lock, flags); } diff --git a/trunk/arch/mips/kernel/smp-cmp.c b/trunk/arch/mips/kernel/smp-cmp.c index f27beca4b26d..ca476c4f62a5 100644 --- a/trunk/arch/mips/kernel/smp-cmp.c +++ b/trunk/arch/mips/kernel/smp-cmp.c @@ -51,10 +51,10 @@ static int __init allowcpus(char *str) int len; cpus_clear(cpu_allow_map); - if (cpulist_parse(str, &cpu_allow_map) == 0) { + if (cpulist_parse(str, cpu_allow_map) == 0) { cpu_set(0, cpu_allow_map); cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map); - len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map); + len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map); buf[len] = '\0'; pr_debug("Allowable CPUs: %s\n", buf); return 1; @@ -226,7 +226,7 @@ void __init cmp_smp_setup(void) for (i = 1; i < NR_CPUS; i++) { if (amon_cpu_avail(i)) { - cpu_set(i, cpu_possible_map); + cpu_set(i, phys_cpu_present_map); __cpu_number_map[i] = ++ncpu; __cpu_logical_map[ncpu] = i; } diff --git a/trunk/arch/mips/kernel/smp-mt.c b/trunk/arch/mips/kernel/smp-mt.c index 6f7ee5ac46ee..87a1816c1f45 100644 --- a/trunk/arch/mips/kernel/smp-mt.c +++ b/trunk/arch/mips/kernel/smp-mt.c @@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0, write_vpe_c0_vpeconf0(tmp); /* Record this as available CPU */ - cpu_set(tc, cpu_possible_map); + cpu_set(tc, phys_cpu_present_map); __cpu_number_map[tc] = ++ncpu; __cpu_logical_map[ncpu] = tc; } diff --git a/trunk/arch/mips/kernel/smp.c b/trunk/arch/mips/kernel/smp.c index 3da94704f816..8bf88faf5afd 100644 --- a/trunk/arch/mips/kernel/smp.c +++ b/trunk/arch/mips/kernel/smp.c @@ -44,10 +44,15 @@ #include #endif /* CONFIG_MIPS_MT_SMTC */ +cpumask_t phys_cpu_present_map; /* Bitmask of available CPUs */ volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ +cpumask_t cpu_online_map; /* Bitmask of currently online CPUs */ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ +EXPORT_SYMBOL(phys_cpu_present_map); +EXPORT_SYMBOL(cpu_online_map); + extern void cpu_idle(void); /* Number of TCs (or siblings in Intel speak) per CPU core */ @@ -190,7 +195,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* preload SMP state for boot cpu */ void __devinit smp_prepare_boot_cpu(void) { - cpu_set(0, cpu_possible_map); + cpu_set(0, phys_cpu_present_map); cpu_set(0, cpu_online_map); cpu_set(0, cpu_callin_map); } diff --git a/trunk/arch/mips/kernel/smtc.c b/trunk/arch/mips/kernel/smtc.c index b6cca01ff82b..897fb2b4751c 100644 --- a/trunk/arch/mips/kernel/smtc.c +++ b/trunk/arch/mips/kernel/smtc.c @@ -290,7 +290,7 @@ static void smtc_configure_tlb(void) * possibly leave some TCs/VPEs as "slave" processors. * * Use c0_MVPConf0 to find out how many TCs are available, setting up - * cpu_possible_map and the logical/physical mappings. + * phys_cpu_present_map and the logical/physical mappings. */ int __init smtc_build_cpu_map(int start_cpu_slot) @@ -304,7 +304,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot) */ ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; for (i=start_cpu_slot; i> MVPCONF0_PTC_SHIFT) + 1)) { - cpu_clear(tc, cpu_possible_map); + cpu_clear(tc, phys_cpu_present_map); cpu_clear(tc, cpu_present_map); tc++; } diff --git a/trunk/arch/mips/mti-malta/malta-smtc.c b/trunk/arch/mips/mti-malta/malta-smtc.c index aabd7274507b..f84a46a8ae6e 100644 --- a/trunk/arch/mips/mti-malta/malta-smtc.c +++ b/trunk/arch/mips/mti-malta/malta-smtc.c @@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = { */ -void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) +void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity) { - cpumask_t tmask = *affinity; + cpumask_t tmask = affinity; int cpu = 0; void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); @@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) * be made to forward to an offline "CPU". */ - for_each_cpu(cpu, affinity) { + for_each_cpu_mask(cpu, affinity) { if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) cpu_clear(cpu, tmask); } diff --git a/trunk/arch/mips/nxp/pnx8550/common/time.c b/trunk/arch/mips/nxp/pnx8550/common/time.c index cf293b279098..62f495b57f93 100644 --- a/trunk/arch/mips/nxp/pnx8550/common/time.c +++ b/trunk/arch/mips/nxp/pnx8550/common/time.c @@ -102,7 +102,6 @@ __init void plat_time_init(void) unsigned int p; unsigned int pow2p; - pnx8xxx_clockevent.cpumask = cpu_none_mask; clockevents_register_device(&pnx8xxx_clockevent); clocksource_register(&pnx_clocksource); diff --git a/trunk/arch/mips/pmc-sierra/yosemite/smp.c b/trunk/arch/mips/pmc-sierra/yosemite/smp.c index f78c29b68d77..3a7df647ca77 100644 --- a/trunk/arch/mips/pmc-sierra/yosemite/smp.c +++ b/trunk/arch/mips/pmc-sierra/yosemite/smp.c @@ -141,7 +141,7 @@ static void __cpuinit yos_boot_secondary(int cpu, struct task_struct *idle) } /* - * Detect available CPUs, populate cpu_possible_map before smp_init + * Detect available CPUs, populate phys_cpu_present_map before smp_init * * We don't want to start the secondary CPU yet nor do we have a nice probing * feature in PMON so we just assume presence of the secondary core. @@ -150,10 +150,10 @@ static void __init yos_smp_setup(void) { int i; - cpus_clear(cpu_possible_map); + cpus_clear(phys_cpu_present_map); for (i = 0; i < 2; i++) { - cpu_set(i, cpu_possible_map); + cpu_set(i, phys_cpu_present_map); __cpu_number_map[i] = i; __cpu_logical_map[i] = i; } diff --git a/trunk/arch/mips/sgi-ip27/ip27-smp.c b/trunk/arch/mips/sgi-ip27/ip27-smp.c index 5b47d6b65275..ba5cdebeaf0d 100644 --- a/trunk/arch/mips/sgi-ip27/ip27-smp.c +++ b/trunk/arch/mips/sgi-ip27/ip27-smp.c @@ -76,7 +76,7 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest) /* Only let it join in if it's marked enabled */ if ((acpu->cpu_info.flags & KLINFO_ENABLE) && (tot_cpus_found != NR_CPUS)) { - cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, phys_cpu_present_map); alloc_cpupda(cpuid, tot_cpus_found); cpus_found++; tot_cpus_found++; diff --git a/trunk/arch/mips/sgi-ip27/ip27-timer.c b/trunk/arch/mips/sgi-ip27/ip27-timer.c index f024057a35f8..1327c2746fb7 100644 --- a/trunk/arch/mips/sgi-ip27/ip27-timer.c +++ b/trunk/arch/mips/sgi-ip27/ip27-timer.c @@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void) cd->min_delta_ns = clockevent_delta2ns(0x300, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); cd->set_next_event = rt_next_event; cd->set_mode = rt_set_mode; clockevents_register_device(cd); diff --git a/trunk/arch/mips/sibyte/bcm1480/irq.c b/trunk/arch/mips/sibyte/bcm1480/irq.c index 12b465d404df..a35818ed4263 100644 --- a/trunk/arch/mips/sibyte/bcm1480/irq.c +++ b/trunk/arch/mips/sibyte/bcm1480/irq.c @@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq); static void disable_bcm1480_irq(unsigned int irq); static void ack_bcm1480_irq(unsigned int irq); #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); +static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask); #endif #ifdef CONFIG_PCI @@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) +static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask) { int i = 0, old_cpu, cpu, int_on, k; u64 cur_ints; @@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) unsigned long flags; unsigned int irq_dirty; - if (cpumask_weight(mask) != 1) { + if (cpus_weight(mask) != 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); return; } - i = cpumask_first(mask); + i = first_cpu(mask); /* Convert logical CPU to physical CPU */ cpu = cpu_logical_map(i); diff --git a/trunk/arch/mips/sibyte/bcm1480/smp.c b/trunk/arch/mips/sibyte/bcm1480/smp.c index dddfda8e8294..bd9eeb43ed0e 100644 --- a/trunk/arch/mips/sibyte/bcm1480/smp.c +++ b/trunk/arch/mips/sibyte/bcm1480/smp.c @@ -136,7 +136,7 @@ static void __cpuinit bcm1480_boot_secondary(int cpu, struct task_struct *idle) /* * Use CFE to find out how many CPUs are available, setting up - * cpu_possible_map and the logical/physical mappings. + * phys_cpu_present_map and the logical/physical mappings. * XXXKW will the boot CPU ever not be physical 0? * * Common setup before any secondaries are started @@ -145,14 +145,14 @@ static void __init bcm1480_smp_setup(void) { int i, num; - cpus_clear(cpu_possible_map); - cpu_set(0, cpu_possible_map); + cpus_clear(phys_cpu_present_map); + cpu_set(0, phys_cpu_present_map); __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; for (i = 1, num = 0; i < NR_CPUS; i++) { if (cfe_cpu_stop(i) == 0) { - cpu_set(i, cpu_possible_map); + cpu_set(i, phys_cpu_present_map); __cpu_number_map[i] = ++num; __cpu_logical_map[num] = i; } diff --git a/trunk/arch/mips/sibyte/sb1250/irq.c b/trunk/arch/mips/sibyte/sb1250/irq.c index 808ac2959b8c..a5158483986e 100644 --- a/trunk/arch/mips/sibyte/sb1250/irq.c +++ b/trunk/arch/mips/sibyte/sb1250/irq.c @@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq); static void disable_sb1250_irq(unsigned int irq); static void ack_sb1250_irq(unsigned int irq); #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); +static void sb1250_set_affinity(unsigned int irq, cpumask_t mask); #endif #ifdef CONFIG_SIBYTE_HAS_LDT @@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) +static void sb1250_set_affinity(unsigned int irq, cpumask_t mask) { int i = 0, old_cpu, cpu, int_on; u64 cur_ints; struct irq_desc *desc = irq_desc + irq; unsigned long flags; - i = cpumask_first(mask); + i = first_cpu(mask); - if (cpumask_weight(mask) > 1) { + if (cpus_weight(mask) > 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); return; } diff --git a/trunk/arch/mips/sibyte/sb1250/smp.c b/trunk/arch/mips/sibyte/sb1250/smp.c index 5950a288a7da..0734b933e969 100644 --- a/trunk/arch/mips/sibyte/sb1250/smp.c +++ b/trunk/arch/mips/sibyte/sb1250/smp.c @@ -124,7 +124,7 @@ static void __cpuinit sb1250_boot_secondary(int cpu, struct task_struct *idle) /* * Use CFE to find out how many CPUs are available, setting up - * cpu_possible_map and the logical/physical mappings. + * phys_cpu_present_map and the logical/physical mappings. * XXXKW will the boot CPU ever not be physical 0? * * Common setup before any secondaries are started @@ -133,14 +133,14 @@ static void __init sb1250_smp_setup(void) { int i, num; - cpus_clear(cpu_possible_map); - cpu_set(0, cpu_possible_map); + cpus_clear(phys_cpu_present_map); + cpu_set(0, phys_cpu_present_map); __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; for (i = 1, num = 0; i < NR_CPUS; i++) { if (cfe_cpu_stop(i) == 0) { - cpu_set(i, cpu_possible_map); + cpu_set(i, phys_cpu_present_map); __cpu_number_map[i] = ++num; __cpu_logical_map[num] = i; } diff --git a/trunk/arch/mips/sni/time.c b/trunk/arch/mips/sni/time.c index 69f5f88711cc..796e3ce28720 100644 --- a/trunk/arch/mips/sni/time.c +++ b/trunk/arch/mips/sni/time.c @@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void) struct irqaction *action = &a20r_irqaction; unsigned int cpu = smp_processor_id(); - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); clockevents_register_device(cd); action->dev_id = cd; setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction); diff --git a/trunk/arch/parisc/Kconfig b/trunk/arch/parisc/Kconfig index aacf11d33723..644a70b1b04e 100644 --- a/trunk/arch/parisc/Kconfig +++ b/trunk/arch/parisc/Kconfig @@ -11,7 +11,6 @@ config PARISC select HAVE_OPROFILE select RTC_CLASS select RTC_DRV_PARISC - select INIT_ALL_POSSIBLE help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, diff --git a/trunk/arch/parisc/kernel/irq.c b/trunk/arch/parisc/kernel/irq.c index 4cea935e2f99..23ef950df008 100644 --- a/trunk/arch/parisc/kernel/irq.c +++ b/trunk/arch/parisc/kernel/irq.c @@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest) return 0; } -static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) +static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest) { - if (cpu_check_affinity(irq, dest)) + if (cpu_check_affinity(irq, &dest)) return; - irq_desc[irq].affinity = *dest; + irq_desc[irq].affinity = dest; } #endif diff --git a/trunk/arch/parisc/kernel/smp.c b/trunk/arch/parisc/kernel/smp.c index 80bc000523fa..d47f3975c9c6 100644 --- a/trunk/arch/parisc/kernel/smp.c +++ b/trunk/arch/parisc/kernel/smp.c @@ -67,6 +67,21 @@ static volatile int cpu_now_booting __read_mostly = 0; /* track which CPU is boo static int parisc_max_cpus __read_mostly = 1; +/* online cpus are ones that we've managed to bring up completely + * possible cpus are all valid cpu + * present cpus are all detected cpu + * + * On startup we bring up the "possible" cpus. Since we discover + * CPUs later, we add them as hotplug, so the possible cpu mask is + * empty in the beginning. + */ + +cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; /* Bitmap of online CPUs */ +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; /* Bitmap of Present CPUs */ + +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_possible_map); + DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED; enum ipi_message_type { diff --git a/trunk/arch/powerpc/include/asm/disassemble.h b/trunk/arch/powerpc/include/asm/disassemble.h deleted file mode 100644 index 9b198d1b3b2b..000000000000 --- a/trunk/arch/powerpc/include/asm/disassemble.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#ifndef __ASM_PPC_DISASSEMBLE_H__ -#define __ASM_PPC_DISASSEMBLE_H__ - -#include - -static inline unsigned int get_op(u32 inst) -{ - return inst >> 26; -} - -static inline unsigned int get_xop(u32 inst) -{ - return (inst >> 1) & 0x3ff; -} - -static inline unsigned int get_sprn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_dcrn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_rt(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_rs(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_ra(u32 inst) -{ - return (inst >> 16) & 0x1f; -} - -static inline unsigned int get_rb(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_rc(u32 inst) -{ - return inst & 0x1; -} - -static inline unsigned int get_ws(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_d(u32 inst) -{ - return inst & 0xffff; -} - -#endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/trunk/arch/powerpc/include/asm/kvm_44x.h b/trunk/arch/powerpc/include/asm/kvm_44x.h deleted file mode 100644 index f49031b632ca..000000000000 --- a/trunk/arch/powerpc/include/asm/kvm_44x.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#ifndef __ASM_44X_H__ -#define __ASM_44X_H__ - -#include - -#define PPC44x_TLB_SIZE 64 - -/* If the guest is expecting it, this can be as large as we like; we'd just - * need to find some way of advertising it. */ -#define KVM44x_GUEST_TLB_SIZE 64 - -struct kvmppc_44x_shadow_ref { - struct page *page; - u16 gtlb_index; - u8 writeable; - u8 tid; -}; - -struct kvmppc_vcpu_44x { - /* Unmodified copy of the guest's TLB. */ - struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; - - /* References to guest pages in the hardware TLB. */ - struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; - - /* State of the shadow TLB at guest context switch time. */ - struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; - - struct kvm_vcpu vcpu; -}; - -static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); - -#endif /* __ASM_44X_H__ */ diff --git a/trunk/arch/powerpc/include/asm/kvm_host.h b/trunk/arch/powerpc/include/asm/kvm_host.h index c1e436fe7738..34b52b7180cd 100644 --- a/trunk/arch/powerpc/include/asm/kvm_host.h +++ b/trunk/arch/powerpc/include/asm/kvm_host.h @@ -64,58 +64,27 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvmppc_44x_tlbe { +struct tlbe { u32 tid; /* Only the low 8 bits are used. */ u32 word0; u32 word1; u32 word2; }; -enum kvm_exit_types { - MMIO_EXITS, - DCR_EXITS, - SIGNAL_EXITS, - ITLB_REAL_MISS_EXITS, - ITLB_VIRT_MISS_EXITS, - DTLB_REAL_MISS_EXITS, - DTLB_VIRT_MISS_EXITS, - SYSCALL_EXITS, - ISI_EXITS, - DSI_EXITS, - EMULATED_INST_EXITS, - EMULATED_MTMSRWE_EXITS, - EMULATED_WRTEE_EXITS, - EMULATED_MTSPR_EXITS, - EMULATED_MFSPR_EXITS, - EMULATED_MTMSR_EXITS, - EMULATED_MFMSR_EXITS, - EMULATED_TLBSX_EXITS, - EMULATED_TLBWE_EXITS, - EMULATED_RFI_EXITS, - DEC_EXITS, - EXT_INTR_EXITS, - HALT_WAKEUP, - USR_PR_INST, - FP_UNAVAIL, - DEBUG_EXITS, - TIMEINGUEST, - __NUMBER_OF_KVM_EXIT_TYPES -}; - -/* allow access to big endian 32bit upper/lower parts and 64bit var */ -struct kvmppc_exit_timing { - union { - u64 tv64; - struct { - u32 tbu, tbl; - } tv32; - }; -}; - struct kvm_arch { }; struct kvm_vcpu_arch { + /* Unmodified copy of the guest's TLB. */ + struct tlbe guest_tlb[PPC44x_TLB_SIZE]; + /* TLB that's actually used when the guest is running. */ + struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; + /* Pages which are referenced in the shadow TLB. */ + struct page *shadow_pages[PPC44x_TLB_SIZE]; + + /* Track which TLB entries we've modified in the current exit. */ + u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; + u32 host_stack; u32 host_pid; u32 host_dbcr0; @@ -125,32 +94,32 @@ struct kvm_vcpu_arch { u32 host_msr; u64 fpr[32]; - ulong gpr[32]; + u32 gpr[32]; - ulong pc; + u32 pc; u32 cr; - ulong ctr; - ulong lr; - ulong xer; + u32 ctr; + u32 lr; + u32 xer; - ulong msr; + u32 msr; u32 mmucr; - ulong sprg0; - ulong sprg1; - ulong sprg2; - ulong sprg3; - ulong sprg4; - ulong sprg5; - ulong sprg6; - ulong sprg7; - ulong srr0; - ulong srr1; - ulong csrr0; - ulong csrr1; - ulong dsrr0; - ulong dsrr1; - ulong dear; - ulong esr; + u32 sprg0; + u32 sprg1; + u32 sprg2; + u32 sprg3; + u32 sprg4; + u32 sprg5; + u32 sprg6; + u32 sprg7; + u32 srr0; + u32 srr1; + u32 csrr0; + u32 csrr1; + u32 dsrr0; + u32 dsrr1; + u32 dear; + u32 esr; u32 dec; u32 decar; u32 tbl; @@ -158,7 +127,7 @@ struct kvm_vcpu_arch { u32 tcr; u32 tsr; u32 ivor[16]; - ulong ivpr; + u32 ivpr; u32 pir; u32 shadow_pid; @@ -171,22 +140,9 @@ struct kvm_vcpu_arch { u32 dbcr0; u32 dbcr1; -#ifdef CONFIG_KVM_EXIT_TIMING - struct kvmppc_exit_timing timing_exit; - struct kvmppc_exit_timing timing_last_enter; - u32 last_exit_type; - u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; - u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; - u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; - u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; - u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; - u64 timing_last_exit; - struct dentry *debugfs_exit_timing; -#endif - u32 last_inst; - ulong fault_dear; - ulong fault_esr; + u32 fault_dear; + u32 fault_esr; gpa_t paddr_accessed; u8 io_gpr; /* GPR used as IO source/target */ diff --git a/trunk/arch/powerpc/include/asm/kvm_ppc.h b/trunk/arch/powerpc/include/asm/kvm_ppc.h index 36d2a50a8487..bb62ad876de3 100644 --- a/trunk/arch/powerpc/include/asm/kvm_ppc.h +++ b/trunk/arch/powerpc/include/asm/kvm_ppc.h @@ -29,6 +29,11 @@ #include #include +struct kvm_tlb { + struct tlbe guest_tlb[PPC44x_TLB_SIZE]; + struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; +}; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ @@ -36,6 +41,9 @@ enum emulation_result { EMULATE_FAIL, /* can't emulate this instruction */ }; +extern const unsigned char exception_priority[]; +extern const unsigned char priority_exception[]; + extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern char kvmppc_handlers_start[]; extern unsigned long kvmppc_handler_len; @@ -50,44 +58,51 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); -extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); -extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - u64 asid, u32 flags, u32 max_bytes, - unsigned int gtlb_idx); +extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, + u64 asid, u32 flags); +extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, + gva_t eend, u32 asid); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); -/* Core-specific hooks */ - -extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, - unsigned int id); -extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); -extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); -extern int kvmppc_core_check_processor_compat(void); -extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr); - -extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); - -extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu); -extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu); - -extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); -extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); -extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); -extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); -extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); - -extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int op, int *advance); -extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); -extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); - -extern int kvmppc_booke_init(void); -extern void kvmppc_booke_exit(void); +/* XXX Book E specific */ +extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); + +extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); + +static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) +{ + unsigned int priority = exception_priority[exception]; + set_bit(priority, &vcpu->arch.pending_exceptions); +} + +static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) +{ + unsigned int priority = exception_priority[exception]; + clear_bit(priority, &vcpu->arch.pending_exceptions); +} + +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ +static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) +{ + if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) + kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); + + vcpu->arch.msr = new_msr; + + if (vcpu->arch.msr & MSR_WE) + kvm_vcpu_block(vcpu); +} + +static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) +{ + if (vcpu->arch.pid != new_pid) { + vcpu->arch.pid = new_pid; + vcpu->arch.swap_pid = 1; + } +} extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); diff --git a/trunk/arch/powerpc/include/asm/mmu-44x.h b/trunk/arch/powerpc/include/asm/mmu-44x.h index 27cc6fdcd3b7..8a97cfb08b7e 100644 --- a/trunk/arch/powerpc/include/asm/mmu-44x.h +++ b/trunk/arch/powerpc/include/asm/mmu-44x.h @@ -56,7 +56,6 @@ #ifndef __ASSEMBLY__ extern unsigned int tlb_44x_hwater; -extern unsigned int tlb_44x_index; typedef struct { unsigned int id; diff --git a/trunk/arch/powerpc/include/asm/topology.h b/trunk/arch/powerpc/include/asm/topology.h index 373fca394a54..c32da6f97999 100644 --- a/trunk/arch/powerpc/include/asm/topology.h +++ b/trunk/arch/powerpc/include/asm/topology.h @@ -48,6 +48,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/powerpc/kernel/asm-offsets.c b/trunk/arch/powerpc/kernel/asm-offsets.c index 9937fe44555f..661d07d2146b 100644 --- a/trunk/arch/powerpc/kernel/asm-offsets.c +++ b/trunk/arch/powerpc/kernel/asm-offsets.c @@ -23,6 +23,9 @@ #include #include #include +#ifdef CONFIG_KVM +#include +#endif #ifdef CONFIG_PPC64 #include #include @@ -48,9 +51,6 @@ #ifdef CONFIG_PPC_ISERIES #include #endif -#ifdef CONFIG_KVM -#include -#endif #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) #include "head_booke.h" @@ -357,10 +357,12 @@ int main(void) DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM - DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); + DEFINE(TLBE_BYTES, sizeof(struct tlbe)); DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); + DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); + DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); @@ -383,16 +385,5 @@ int main(void) DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif -#ifdef CONFIG_KVM_EXIT_TIMING - DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, - arch.timing_exit.tv32.tbu)); - DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, - arch.timing_exit.tv32.tbl)); - DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, - arch.timing_last_enter.tv32.tbu)); - DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, - arch.timing_last_enter.tv32.tbl)); -#endif - return 0; } diff --git a/trunk/arch/powerpc/kernel/irq.c b/trunk/arch/powerpc/kernel/irq.c index 23b8b5e36f98..ac222d0ab12e 100644 --- a/trunk/arch/powerpc/kernel/irq.c +++ b/trunk/arch/powerpc/kernel/irq.c @@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map) mask = map; } if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, &mask); + irq_desc[irq].chip->set_affinity(irq, mask); else if (irq_desc[irq].action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/trunk/arch/powerpc/kernel/smp.c b/trunk/arch/powerpc/kernel/smp.c index 65484b2200b3..8ac3f721d235 100644 --- a/trunk/arch/powerpc/kernel/smp.c +++ b/trunk/arch/powerpc/kernel/smp.c @@ -59,9 +59,13 @@ struct thread_info *secondary_ti; +cpumask_t cpu_possible_map = CPU_MASK_NONE; +cpumask_t cpu_online_map = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_possible_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c index 99f1ddd68582..e1f3a5140429 100644 --- a/trunk/arch/powerpc/kernel/time.c +++ b/trunk/arch/powerpc/kernel/time.c @@ -844,7 +844,7 @@ static void register_decrementer_clockevent(int cpu) struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; *dec = decrementer_clockevent; - dec->cpumask = cpumask_of(cpu); + dec->cpumask = cpumask_of_cpu(cpu); printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); diff --git a/trunk/arch/powerpc/kvm/44x.c b/trunk/arch/powerpc/kvm/44x.c deleted file mode 100644 index a66bec57265a..000000000000 --- a/trunk/arch/powerpc/kvm/44x.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#include -#include - -#include -#include -#include -#include -#include - -#include "44x_tlb.h" - -/* Note: clearing MSR[DE] just means that the debug interrupt will not be - * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. - * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt - * will be delivered as an "imprecise debug event" (which is indicated by - * DBSR[IDE]. - */ -static void kvm44x_disable_debug_interrupts(void) -{ - mtmsr(mfmsr() & ~MSR_DE); -} - -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ - kvm44x_disable_debug_interrupts(); - - mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); - mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); - mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); - mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); - mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); - mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); - mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); - mtmsr(vcpu->arch.host_msr); -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - u32 dbcr0 = 0; - - vcpu->arch.host_msr = mfmsr(); - kvm44x_disable_debug_interrupts(); - - /* Save host debug register state. */ - vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); - vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); - vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); - vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); - vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); - vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); - vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); - - /* set registers up for guest */ - - if (dbg->bp[0]) { - mtspr(SPRN_IAC1, dbg->bp[0]); - dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; - } - if (dbg->bp[1]) { - mtspr(SPRN_IAC2, dbg->bp[1]); - dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; - } - if (dbg->bp[2]) { - mtspr(SPRN_IAC3, dbg->bp[2]); - dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; - } - if (dbg->bp[3]) { - mtspr(SPRN_IAC4, dbg->bp[3]); - dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; - } - - mtspr(SPRN_DBCR0, dbcr0); - mtspr(SPRN_DBCR1, 0); - mtspr(SPRN_DBCR2, 0); -} - -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ - kvmppc_44x_tlb_load(vcpu); -} - -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) -{ - kvmppc_44x_tlb_put(vcpu); -} - -int kvmppc_core_check_processor_compat(void) -{ - int r; - - if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) - r = 0; - else - r = -ENOTSUPP; - - return r; -} - -int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; - int i; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) - vcpu_44x->shadow_refs[i].gtlb_index = -1; - - return 0; -} - -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } - - gtlbe = &vcpu_44x->guest_tlb[index]; - - tr->physical_address = tlb_xlate(gtlbe, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) -{ - struct kvmppc_vcpu_44x *vcpu_44x; - struct kvm_vcpu *vcpu; - int err; - - vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_44x) { - err = -ENOMEM; - goto out; - } - - vcpu = &vcpu_44x->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - - return vcpu; - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -out: - return ERR_PTR(err); -} - -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -} - -static int kvmppc_44x_init(void) -{ - int r; - - r = kvmppc_booke_init(); - if (r) - return r; - - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); -} - -static void kvmppc_44x_exit(void) -{ - kvmppc_booke_exit(); -} - -module_init(kvmppc_44x_init); -module_exit(kvmppc_44x_exit); diff --git a/trunk/arch/powerpc/kvm/44x_emulate.c b/trunk/arch/powerpc/kvm/44x_emulate.c deleted file mode 100644 index 82489a743a6f..000000000000 --- a/trunk/arch/powerpc/kvm/44x_emulate.c +++ /dev/null @@ -1,371 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#include -#include -#include -#include -#include -#include "timing.h" - -#include "booke.h" -#include "44x_tlb.h" - -#define OP_RFI 19 - -#define XOP_RFI 50 -#define XOP_MFMSR 83 -#define XOP_WRTEE 131 -#define XOP_MTMSR 146 -#define XOP_WRTEEI 163 -#define XOP_MFDCR 323 -#define XOP_MTDCR 451 -#define XOP_TLBSX 914 -#define XOP_ICCCI 966 -#define XOP_TLBWE 978 - -static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); -} - -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) -{ - int emulated = EMULATE_DONE; - int dcrn; - int ra; - int rb; - int rc; - int rs; - int rt; - int ws; - - switch (get_op(inst)) { - case OP_RFI: - switch (get_xop(inst)) { - case XOP_RFI: - kvmppc_emul_rfi(vcpu); - kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); - *advance = 0; - break; - - default: - emulated = EMULATE_FAIL; - break; - } - break; - - case 31: - switch (get_xop(inst)) { - - case XOP_MFMSR: - rt = get_rt(inst); - vcpu->arch.gpr[rt] = vcpu->arch.msr; - kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); - break; - - case XOP_MTMSR: - rs = get_rs(inst); - kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); - break; - - case XOP_WRTEE: - rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (vcpu->arch.gpr[rs] & MSR_EE); - kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); - break; - - case XOP_WRTEEI: - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (inst & MSR_EE); - kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); - break; - - case XOP_MFDCR: - dcrn = get_dcrn(inst); - rt = get_rt(inst); - - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); - local_irq_enable(); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = 0; - run->dcr.is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } - - break; - - case XOP_MTDCR: - dcrn = get_dcrn(inst); - rs = get_rs(inst); - - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = vcpu->arch.gpr[rs]; - run->dcr.is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } - - break; - - case XOP_TLBWE: - ra = get_ra(inst); - rs = get_rs(inst); - ws = get_ws(inst); - emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); - break; - - case XOP_TLBSX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - rc = get_rc(inst); - emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); - break; - - case XOP_ICCCI: - break; - - default: - emulated = EMULATE_FAIL; - } - - break; - - default: - emulated = EMULATE_FAIL; - } - - return emulated; -} - -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) -{ - switch (sprn) { - case SPRN_MMUCR: - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; - case SPRN_PID: - kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; - case SPRN_CCR0: - vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; - case SPRN_DEAR: - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; - case SPRN_ESR: - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR0: - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR1: - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; - case SPRN_TSR: - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; - case SPRN_TCR: - vcpu->arch.tcr = vcpu->arch.gpr[rs]; - kvmppc_emulate_dec(vcpu); - break; - - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ - case SPRN_SPRG4: - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG5: - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG6: - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG7: - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; - - case SPRN_IVPR: - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR0: - vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR1: - vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR2: - vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR3: - vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR4: - vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR5: - vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR6: - vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR7: - vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR8: - vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR9: - vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR10: - vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR11: - vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR12: - vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR13: - vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR14: - vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR15: - vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; - break; - - default: - return EMULATE_FAIL; - } - - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); - return EMULATE_DONE; -} - -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) -{ - switch (sprn) { - /* 440 */ - case SPRN_MMUCR: - vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; - case SPRN_CCR0: - vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; - case SPRN_CCR1: - vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; - - /* Book E */ - case SPRN_PID: - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; - case SPRN_IVPR: - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; - case SPRN_DEAR: - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; - case SPRN_ESR: - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; - case SPRN_DBCR0: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; - case SPRN_DBCR1: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; - - case SPRN_IVOR0: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; - break; - case SPRN_IVOR1: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; - break; - case SPRN_IVOR2: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; - break; - case SPRN_IVOR3: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; - break; - case SPRN_IVOR4: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; - break; - case SPRN_IVOR5: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; - break; - case SPRN_IVOR6: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; - break; - case SPRN_IVOR7: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; - break; - case SPRN_IVOR8: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; - break; - case SPRN_IVOR9: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; - break; - case SPRN_IVOR10: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; - break; - case SPRN_IVOR11: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; - break; - case SPRN_IVOR12: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; - break; - case SPRN_IVOR13: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; - break; - case SPRN_IVOR14: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; - break; - case SPRN_IVOR15: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; - break; - - default: - return EMULATE_FAIL; - } - - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); - return EMULATE_DONE; -} - diff --git a/trunk/arch/powerpc/kvm/44x_tlb.c b/trunk/arch/powerpc/kvm/44x_tlb.c index 9a34b8edb9e2..ad72c6f9811f 100644 --- a/trunk/arch/powerpc/kvm/44x_tlb.c +++ b/trunk/arch/powerpc/kvm/44x_tlb.c @@ -22,103 +22,20 @@ #include #include #include - -#include #include #include -#include -#include "timing.h" #include "44x_tlb.h" -#ifndef PPC44x_TLBE_SIZE -#define PPC44x_TLBE_SIZE PPC44x_TLB_4K -#endif - -#define PAGE_SIZE_4K (1<<12) -#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) - -#define PPC44x_TLB_UATTR_MASK \ - (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) -#ifdef DEBUG -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct kvmppc_44x_tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - tlbe = &vcpu_44x->guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} -#endif - -static inline void kvmppc_44x_tlbie(unsigned int index) -{ - /* 0 <= index < 64, so the V bit is clear and we can use the index as - * word0. */ - asm volatile( - "tlbwe %[index], %[index], 0\n" - : - : [index] "r"(index) - ); -} - -static inline void kvmppc_44x_tlbre(unsigned int index, - struct kvmppc_44x_tlbe *tlbe) -{ - asm volatile( - "tlbre %[word0], %[index], 0\n" - "mfspr %[tid], %[sprn_mmucr]\n" - "andi. %[tid], %[tid], 0xff\n" - "tlbre %[word1], %[index], 1\n" - "tlbre %[word2], %[index], 2\n" - : [word0] "=r"(tlbe->word0), - [word1] "=r"(tlbe->word1), - [word2] "=r"(tlbe->word2), - [tid] "=r"(tlbe->tid) - : [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - : "cc" - ); -} - -static inline void kvmppc_44x_tlbwe(unsigned int index, - struct kvmppc_44x_tlbe *stlbe) -{ - unsigned long tmp; - - asm volatile( - "mfspr %[tmp], %[sprn_mmucr]\n" - "rlwimi %[tmp], %[tid], 0, 0xff\n" - "mtspr %[sprn_mmucr], %[tmp]\n" - "tlbwe %[word0], %[index], 0\n" - "tlbwe %[word1], %[index], 1\n" - "tlbwe %[word2], %[index], 2\n" - : [tmp] "=&r"(tmp) - : [word0] "r"(stlbe->word0), - [word1] "r"(stlbe->word1), - [word2] "r"(stlbe->word2), - [tid] "r"(stlbe->tid), - [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - ); -} +static unsigned int kvmppc_tlb_44x_pos; static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) { - /* We only care about the guest's permission and user bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; + /* Mask off reserved bits. */ + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; if (!usermode) { /* Guest is in supervisor mode, so we need to translate guest @@ -130,60 +47,18 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) /* Make sure host can always access this memory. */ attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; - /* WIMGE = 0b00100 */ - attrib |= PPC44x_TLB_M; - return attrib; } -/* Load shadow TLB back into hardware. */ -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbwe(i, stlbe); - } -} - -static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int i) -{ - vcpu_44x->shadow_tlb_mod[i] = 1; -} - -/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (vcpu_44x->shadow_tlb_mod[i]) - kvmppc_44x_tlbre(i, stlbe); - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbie(i); - } -} - - /* Search the guest TLB for a matching entry. */ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as) { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -208,89 +83,78 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, return -1; } -int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int index; - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + if (index == -1) + return NULL; + return &vcpu->arch.guest_tlb[index]; } -int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int index; - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + if (index == -1) + return NULL; + return &vcpu->arch.guest_tlb[index]; } -static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int stlb_index) +static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; - - if (!ref->page) - return; - - /* Discard from the TLB. */ - /* Note: we could actually invalidate a host mapping, if the host overwrote - * this TLB entry since we inserted a guest mapping. */ - kvmppc_44x_tlbie(stlb_index); - - /* Now release the page. */ - if (ref->writeable) - kvm_release_page_dirty(ref->page); - else - kvm_release_page_clean(ref->page); - - ref->page = NULL; + return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); +} - /* XXX set tlb_44x_index to stlb_index? */ +static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, + unsigned int index) +{ + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; + struct page *page = vcpu->arch.shadow_pages[index]; - KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); + if (get_tlb_v(stlbe)) { + if (kvmppc_44x_tlbe_is_writable(stlbe)) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); + } } void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu_44x, i); + kvmppc_44x_shadow_release(vcpu, i); } -/** - * kvmppc_mmu_map -- create a host mapping for guest memory - * - * If the guest wanted a larger page than the host supports, only the first - * host page is mapped here and the rest are demand faulted. - * - * If the guest wanted a smaller page than the host page size, we map only the - * guest-size page (i.e. not a full host page mapping). - * - * Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. - */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, - u32 flags, u32 max_bytes, unsigned int gtlb_index) +void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i) +{ + vcpu->arch.shadow_tlb_mod[i] = 1; +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, + u32 flags) { - struct kvmppc_44x_tlbe stlbe; - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_shadow_ref *ref; struct page *new_page; + struct tlbe *stlbe; hpa_t hpaddr; - gfn_t gfn; unsigned int victim; - /* Select TLB entry to clobber. Indirectly guard against races with the TLB - * miss handler by disabling interrupts. */ - local_irq_disable(); - victim = ++tlb_44x_index; - if (victim > tlb_44x_hwater) - victim = 0; - tlb_44x_index = victim; - local_irq_enable(); + /* Future optimization: don't overwrite the TLB entry containing the + * current PC (or stack?). */ + victim = kvmppc_tlb_44x_pos++; + if (kvmppc_tlb_44x_pos > tlb_44x_hwater) + kvmppc_tlb_44x_pos = 0; + stlbe = &vcpu->arch.shadow_tlb[victim]; /* Get reference to new page. */ - gfn = gpaddr >> PAGE_SHIFT; new_page = gfn_to_page(vcpu->kvm, gfn); if (is_error_page(new_page)) { printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); @@ -299,8 +163,10 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, } hpaddr = page_to_phys(new_page); - /* Invalidate any previous shadow mappings. */ - kvmppc_44x_shadow_release(vcpu_44x, victim); + /* Drop reference to old page. */ + kvmppc_44x_shadow_release(vcpu, victim); + + vcpu->arch.shadow_pages[victim] = new_page; /* XXX Make sure (va, size) doesn't overlap any other * entries. 440x6 user manual says the result would be @@ -308,193 +174,78 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, /* XXX what about AS? */ - /* Force TS=1 for all guest mappings. */ - stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; - - if (max_bytes >= PAGE_SIZE) { - /* Guest mapping is larger than or equal to host page size. We can use - * a "native" host mapping. */ - stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; - } else { - /* Guest mapping is smaller than host page size. We must restrict the - * size of the mapping to be at most the smaller of the two, but for - * simplicity we fall back to a 4K mapping (this is probably what the - * guest is using anyways). */ - stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; - - /* 'hpaddr' is a host page, which is larger than the mapping we're - * inserting here. To compensate, we must add the in-page offset to the - * sub-page. */ - hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); - } + stlbe->tid = !(asid & 0xff); - stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, + /* Force TS=1 for all guest mappings. */ + /* For now we hardcode 4KB mappings, but it will be important to + * use host large pages in the future. */ + stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS + | PPC44x_TLB_4K; + stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); + stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, vcpu->arch.msr & MSR_PR); - stlbe.tid = !(asid & 0xff); - - /* Keep track of the reference so we can properly release it later. */ - ref = &vcpu_44x->shadow_refs[victim]; - ref->page = new_page; - ref->gtlb_index = gtlb_index; - ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); - ref->tid = stlbe.tid; - - /* Insert shadow mapping into hardware TLB. */ - kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); - kvmppc_44x_tlbwe(victim, &stlbe); - KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1, - stlbe.word2, handler); -} + kvmppc_tlbe_set_modified(vcpu, victim); -/* For a particular guest TLB entry, invalidate the corresponding host TLB - * mappings and release the host pages. */ -static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, - unsigned int gtlb_index) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - if (ref->gtlb_index == gtlb_index) - kvmppc_44x_shadow_release(vcpu_44x, i); - } + KVMTRACE_5D(STLB_WRITE, vcpu, victim, + stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, + handler); } -void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, + gva_t eend, u32 asid) { - vcpu->arch.shadow_pid = !usermode; -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + unsigned int pid = !(asid & 0xff); int i; - if (unlikely(vcpu->arch.pid == new_pid)) - return; - - vcpu->arch.pid = new_pid; - - /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it - * can't access guest kernel mappings (TID=1). When we switch to a new - * guest PID, which will also use host PID=0, we must discard the old guest - * userspace mappings. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - - if (ref->tid == 0) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct kvmppc_44x_tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - unsigned int gtlb_index; - - gtlb_index = vcpu->arch.gpr[ra]; - if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { - printk("%s: index %d\n", __func__, gtlb_index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu_44x->guest_tlb[gtlb_index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ - if (tlbe->word0 & PPC44x_TLB_VALID) - kvmppc_44x_invalidate(vcpu, gtlb_index); - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = get_mmucr_stid(vcpu); - tlbe->word0 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = vcpu->arch.gpr[rs]; - break; - - default: - return EMULATE_FAIL; - } + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i <= tlb_44x_hwater; i++) { + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; + unsigned int tid; - if (tlbe_is_host_safe(vcpu, tlbe)) { - u64 asid; - gva_t eaddr; - gpa_t gpaddr; - u32 flags; - u32 bytes; + if (!get_tlb_v(stlbe)) + continue; - eaddr = get_tlb_eaddr(tlbe); - gpaddr = get_tlb_raddr(tlbe); + if (eend < get_tlb_eaddr(stlbe)) + continue; - /* Use the advertised page size to mask effective and real addrs. */ - bytes = get_tlb_bytes(tlbe); - eaddr &= ~(bytes - 1); - gpaddr &= ~(bytes - 1); + if (eaddr > get_tlb_end(stlbe)) + continue; - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - flags = tlbe->word2 & 0xffff; + tid = get_tlb_tid(stlbe); + if (tid && (tid != pid)) + continue; - kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index); + kvmppc_44x_shadow_release(vcpu, i); + stlbe->word0 = 0; + kvmppc_tlbe_set_modified(vcpu, i); + KVMTRACE_5D(STLB_INVAL, vcpu, i, + stlbe->tid, stlbe->word0, stlbe->word1, + stlbe->word2, handler); } - - KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, - tlbe->word1, tlbe->word2, handler); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); - return EMULATE_DONE; } -int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) +/* Invalidate all mappings on the privilege switch after PID has been changed. + * The guest always runs with PID=1, so we must clear the entire TLB when + * switching address spaces. */ +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) { - u32 ea; - int gtlb_index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - ea = vcpu->arch.gpr[rb]; - if (ra) - ea += vcpu->arch.gpr[ra]; - - gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - if (gtlb_index < 0) - vcpu->arch.cr &= ~0x20000000; - else - vcpu->arch.cr |= 0x20000000; + int i; + + if (vcpu->arch.swap_pid) { + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i <= tlb_44x_hwater; i++) { + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; + + /* Future optimization: clear only userspace mappings. */ + kvmppc_44x_shadow_release(vcpu, i); + stlbe->word0 = 0; + kvmppc_tlbe_set_modified(vcpu, i); + KVMTRACE_5D(STLB_INVAL, vcpu, i, + stlbe->tid, stlbe->word0, stlbe->word1, + stlbe->word2, handler); + } + vcpu->arch.swap_pid = 0; } - vcpu->arch.gpr[rt] = gtlb_index; - kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); - return EMULATE_DONE; + vcpu->arch.shadow_pid = !usermode; } diff --git a/trunk/arch/powerpc/kvm/44x_tlb.h b/trunk/arch/powerpc/kvm/44x_tlb.h index 772191f29e62..2ccd46b6f6b7 100644 --- a/trunk/arch/powerpc/kvm/44x_tlb.h +++ b/trunk/arch/powerpc/kvm/44x_tlb.h @@ -25,52 +25,48 @@ extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as); -extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); -extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); - -extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, - u8 rc); -extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); +extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); +extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); /* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) +static inline unsigned int get_tlb_size(const struct tlbe *tlbe) { return (tlbe->word0 >> 4) & 0xf; } -static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) +static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) { return tlbe->word0 & 0xfffffc00; } -static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) +static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) { unsigned int pgsize = get_tlb_size(tlbe); return 1 << 10 << (pgsize << 1); } -static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) +static inline gva_t get_tlb_end(const struct tlbe *tlbe) { return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; } -static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) +static inline u64 get_tlb_raddr(const struct tlbe *tlbe) { u64 word1 = tlbe->word1; return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); } -static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) +static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) { return tlbe->tid & 0xff; } -static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) +static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) { return (tlbe->word0 >> 8) & 0x1; } -static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) +static inline unsigned int get_tlb_v(const struct tlbe *tlbe) { return (tlbe->word0 >> 9) & 0x1; } @@ -85,7 +81,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) return (vcpu->arch.mmucr >> 16) & 0x1; } -static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr) +static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) { unsigned int pgmask = get_tlb_bytes(tlbe) - 1; diff --git a/trunk/arch/powerpc/kvm/Kconfig b/trunk/arch/powerpc/kvm/Kconfig index 6dbdc4817d80..53aaa66b25e5 100644 --- a/trunk/arch/powerpc/kvm/Kconfig +++ b/trunk/arch/powerpc/kvm/Kconfig @@ -15,33 +15,27 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION config KVM - bool + bool "Kernel-based Virtual Machine (KVM) support" + depends on 44x && EXPERIMENTAL select PREEMPT_NOTIFIERS select ANON_INODES - -config KVM_440 - bool "KVM support for PowerPC 440 processors" - depends on EXPERIMENTAL && 44x - select KVM + # We can only run on Book E hosts so far + select KVM_BOOKE_HOST ---help--- - Support running unmodified 440 guest kernels in virtual machines on - 440 host processors. + Support hosting virtualized guest machines. You will also + need to select one or more of the processor modules below. This module provides access to the hardware capabilities through a character device node named /dev/kvm. If unsure, say N. -config KVM_EXIT_TIMING - bool "Detailed exit timing" - depends on KVM +config KVM_BOOKE_HOST + bool "KVM host support for Book E PowerPC processors" + depends on KVM && 44x ---help--- - Calculate elapsed time for every exit/enter cycle. A per-vcpu - report is available in debugfs kvm/vm#_vcpu#_timing. - The overhead is relatively small, however it is not recommended for - production environments. - - If unsure, say N. + Provides host support for KVM on Book E PowerPC processors. Currently + this works on 440 processors only. config KVM_TRACE bool "KVM trace support" diff --git a/trunk/arch/powerpc/kvm/Makefile b/trunk/arch/powerpc/kvm/Makefile index df7ba59e6d53..2a5d4397ac4b 100644 --- a/trunk/arch/powerpc/kvm/Makefile +++ b/trunk/arch/powerpc/kvm/Makefile @@ -8,16 +8,10 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) -kvm-objs := $(common-objs-y) powerpc.o emulate.o -obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o +kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o obj-$(CONFIG_KVM) += kvm.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-440-objs := \ - booke.o \ - booke_interrupts.o \ - 44x.o \ - 44x_tlb.o \ - 44x_emulate.o -obj-$(CONFIG_KVM_440) += kvm-440.o +kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o +obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o diff --git a/trunk/arch/powerpc/kvm/booke.h b/trunk/arch/powerpc/kvm/booke.h deleted file mode 100644 index cf7c94ca24bf..000000000000 --- a/trunk/arch/powerpc/kvm/booke.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - */ - -#ifndef __KVM_BOOKE_H__ -#define __KVM_BOOKE_H__ - -#include -#include -#include "timing.h" - -/* interrupt priortity ordering */ -#define BOOKE_IRQPRIO_DATA_STORAGE 0 -#define BOOKE_IRQPRIO_INST_STORAGE 1 -#define BOOKE_IRQPRIO_ALIGNMENT 2 -#define BOOKE_IRQPRIO_PROGRAM 3 -#define BOOKE_IRQPRIO_FP_UNAVAIL 4 -#define BOOKE_IRQPRIO_SYSCALL 5 -#define BOOKE_IRQPRIO_AP_UNAVAIL 6 -#define BOOKE_IRQPRIO_DTLB_MISS 7 -#define BOOKE_IRQPRIO_ITLB_MISS 8 -#define BOOKE_IRQPRIO_MACHINE_CHECK 9 -#define BOOKE_IRQPRIO_DEBUG 10 -#define BOOKE_IRQPRIO_CRITICAL 11 -#define BOOKE_IRQPRIO_WATCHDOG 12 -#define BOOKE_IRQPRIO_EXTERNAL 13 -#define BOOKE_IRQPRIO_FIT 14 -#define BOOKE_IRQPRIO_DECREMENTER 15 - -/* Helper function for "full" MSR writes. No need to call this if only EE is - * changing. */ -static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) -{ - if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) - kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); - - vcpu->arch.msr = new_msr; - - if (vcpu->arch.msr & MSR_WE) { - kvm_vcpu_block(vcpu); - kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); - }; -} - -#endif /* __KVM_BOOKE_H__ */ diff --git a/trunk/arch/powerpc/kvm/booke.c b/trunk/arch/powerpc/kvm/booke_guest.c similarity index 56% rename from trunk/arch/powerpc/kvm/booke.c rename to trunk/arch/powerpc/kvm/booke_guest.c index 35485dd6927e..7b2591e26bae 100644 --- a/trunk/arch/powerpc/kvm/booke.c +++ b/trunk/arch/powerpc/kvm/booke_guest.c @@ -24,26 +24,21 @@ #include #include #include - #include #include #include -#include "timing.h" -#include -#include -#include "booke.h" #include "44x_tlb.h" -unsigned long kvmppc_booke_handlers; - #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_stats_debugfs_item debugfs_entries[] = { + { "exits", VCPU_STAT(sum_exits) }, { "mmio", VCPU_STAT(mmio_exits) }, { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, + { "light", VCPU_STAT(light_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, @@ -58,19 +53,103 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +static const u32 interrupt_msr_mask[16] = { + [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, + [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, + [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, + [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_DEBUG] = MSR_ME, +}; + +const unsigned char exception_priority[] = { + [BOOKE_INTERRUPT_DATA_STORAGE] = 0, + [BOOKE_INTERRUPT_INST_STORAGE] = 1, + [BOOKE_INTERRUPT_ALIGNMENT] = 2, + [BOOKE_INTERRUPT_PROGRAM] = 3, + [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, + [BOOKE_INTERRUPT_SYSCALL] = 5, + [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, + [BOOKE_INTERRUPT_DTLB_MISS] = 7, + [BOOKE_INTERRUPT_ITLB_MISS] = 8, + [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, + [BOOKE_INTERRUPT_DEBUG] = 10, + [BOOKE_INTERRUPT_CRITICAL] = 11, + [BOOKE_INTERRUPT_WATCHDOG] = 12, + [BOOKE_INTERRUPT_EXTERNAL] = 13, + [BOOKE_INTERRUPT_FIT] = 14, + [BOOKE_INTERRUPT_DECREMENTER] = 15, +}; + +const unsigned char priority_exception[] = { + BOOKE_INTERRUPT_DATA_STORAGE, + BOOKE_INTERRUPT_INST_STORAGE, + BOOKE_INTERRUPT_ALIGNMENT, + BOOKE_INTERRUPT_PROGRAM, + BOOKE_INTERRUPT_FP_UNAVAIL, + BOOKE_INTERRUPT_SYSCALL, + BOOKE_INTERRUPT_AP_UNAVAIL, + BOOKE_INTERRUPT_DTLB_MISS, + BOOKE_INTERRUPT_ITLB_MISS, + BOOKE_INTERRUPT_MACHINE_CHECK, + BOOKE_INTERRUPT_DEBUG, + BOOKE_INTERRUPT_CRITICAL, + BOOKE_INTERRUPT_WATCHDOG, + BOOKE_INTERRUPT_EXTERNAL, + BOOKE_INTERRUPT_FIT, + BOOKE_INTERRUPT_DECREMENTER, +}; + + +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct tlbe *tlbe; + int i; + + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); + printk("| %2s | %3s | %8s | %8s | %8s |\n", + "nr", "tid", "word0", "word1", "word2"); + + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + tlbe = &vcpu->arch.guest_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" G%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } + + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + tlbe = &vcpu->arch.shadow_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" S%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } +} + /* TODO: use vcpu_printf() */ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); - printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); - printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); + printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); + printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); for (i = 0; i < 32; i += 4) { - printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, + printk("gpr%02d: %08x %08x %08x %08x\n", i, vcpu->arch.gpr[i], vcpu->arch.gpr[i+1], vcpu->arch.gpr[i+2], @@ -78,96 +157,69 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) } } -static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, - unsigned int priority) +/* Check if we are ready to deliver the interrupt */ +static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) { - set_bit(priority, &vcpu->arch.pending_exceptions); -} + int r; -void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) -{ - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); -} - -void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) -{ - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); -} - -int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) -{ - return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); -} - -void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) -{ - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); -} - -/* Deliver the interrupt of the corresponding priority, if possible. */ -static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, - unsigned int priority) -{ - int allowed = 0; - ulong msr_mask; - - switch (priority) { - case BOOKE_IRQPRIO_PROGRAM: - case BOOKE_IRQPRIO_DTLB_MISS: - case BOOKE_IRQPRIO_ITLB_MISS: - case BOOKE_IRQPRIO_SYSCALL: - case BOOKE_IRQPRIO_DATA_STORAGE: - case BOOKE_IRQPRIO_INST_STORAGE: - case BOOKE_IRQPRIO_FP_UNAVAIL: - case BOOKE_IRQPRIO_AP_UNAVAIL: - case BOOKE_IRQPRIO_ALIGNMENT: - allowed = 1; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + switch (interrupt) { + case BOOKE_INTERRUPT_CRITICAL: + r = vcpu->arch.msr & MSR_CE; break; - case BOOKE_IRQPRIO_CRITICAL: - case BOOKE_IRQPRIO_WATCHDOG: - allowed = vcpu->arch.msr & MSR_CE; - msr_mask = MSR_ME; + case BOOKE_INTERRUPT_MACHINE_CHECK: + r = vcpu->arch.msr & MSR_ME; break; - case BOOKE_IRQPRIO_MACHINE_CHECK: - allowed = vcpu->arch.msr & MSR_ME; - msr_mask = 0; + case BOOKE_INTERRUPT_EXTERNAL: + r = vcpu->arch.msr & MSR_EE; break; - case BOOKE_IRQPRIO_EXTERNAL: - case BOOKE_IRQPRIO_DECREMENTER: - case BOOKE_IRQPRIO_FIT: - allowed = vcpu->arch.msr & MSR_EE; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + case BOOKE_INTERRUPT_DECREMENTER: + r = vcpu->arch.msr & MSR_EE; break; - case BOOKE_IRQPRIO_DEBUG: - allowed = vcpu->arch.msr & MSR_DE; - msr_mask = MSR_ME; + case BOOKE_INTERRUPT_FIT: + r = vcpu->arch.msr & MSR_EE; break; + case BOOKE_INTERRUPT_WATCHDOG: + r = vcpu->arch.msr & MSR_CE; + break; + case BOOKE_INTERRUPT_DEBUG: + r = vcpu->arch.msr & MSR_DE; + break; + default: + r = 1; } - if (allowed) { - vcpu->arch.srr0 = vcpu->arch.pc; - vcpu->arch.srr1 = vcpu->arch.msr; - vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; - kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); + return r; +} - clear_bit(priority, &vcpu->arch.pending_exceptions); +static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +{ + switch (interrupt) { + case BOOKE_INTERRUPT_DECREMENTER: + vcpu->arch.tsr |= TSR_DIS; + break; } - return allowed; + vcpu->arch.srr0 = vcpu->arch.pc; + vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; + kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned int exception; unsigned int priority; - priority = __ffs(*pending); + priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); while (priority <= BOOKE_MAX_INTERRUPT) { - if (kvmppc_booke_irqprio_deliver(vcpu, priority)) + exception = priority_exception[priority]; + if (kvmppc_can_deliver_interrupt(vcpu, exception)) { + kvmppc_clear_exception(vcpu, exception); + kvmppc_deliver_interrupt(vcpu, exception); break; + } priority = find_next_bit(pending, BITS_PER_BYTE * sizeof(*pending), @@ -186,9 +238,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, enum emulation_result er; int r = RESUME_HOST; - /* update before a new last_exit_type is rewritten */ - kvmppc_update_timing_stats(vcpu); - local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -202,19 +251,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_EXTERNAL: - kvmppc_account_exit(vcpu, EXT_INTR_EXITS); - if (need_resched()) - cond_resched(); - r = RESUME_GUEST; - break; - case BOOKE_INTERRUPT_DECREMENTER: /* Since we switched IVPR back to the host's value, the host * handled this interrupt the moment we enabled interrupts. * Now we just offer it a chance to reschedule the guest. */ - kvmppc_account_exit(vcpu, DEC_EXITS); + + /* XXX At this point the TLB still holds our shadow TLB, so if + * we do reschedule the host will fault over it. Perhaps we + * should politely restore the host's entries to minimize + * misses before ceding control. */ if (need_resched()) cond_resched(); + if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) + vcpu->stat.dec_exits++; + else + vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -223,19 +274,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Program traps generated by user-level software must be handled * by the guest kernel. */ vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); r = RESUME_GUEST; - kvmppc_account_exit(vcpu, USR_PR_INST); break; } er = kvmppc_emulate_instruction(run, vcpu); switch (er) { case EMULATE_DONE: - /* don't overwrite subtypes, just account kvm_stats */ - kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); /* Future optimization: only reload non-volatiles if * they were actually modified by emulation. */ + vcpu->stat.emulated_inst_exits++; r = RESUME_GUEST_NV; break; case EMULATE_DO_DCR: @@ -244,7 +293,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case EMULATE_FAIL: /* XXX Deliver Program interrupt to guest. */ - printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", + printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ @@ -258,53 +307,48 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_FP_UNAVAIL: - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); - kvmppc_account_exit(vcpu, FP_UNAVAIL); + kvmppc_queue_exception(vcpu, exit_nr); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DATA_STORAGE: vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); - kvmppc_account_exit(vcpu, DSI_EXITS); + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.dsi_exits++; r = RESUME_GUEST; break; case BOOKE_INTERRUPT_INST_STORAGE: vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); - kvmppc_account_exit(vcpu, ISI_EXITS); + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.isi_exits++; r = RESUME_GUEST; break; case BOOKE_INTERRUPT_SYSCALL: - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); - kvmppc_account_exit(vcpu, SYSCALL_EXITS); + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.syscall_exits++; r = RESUME_GUEST; break; - /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_DTLB_MISS: { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; + struct tlbe *gtlbe; unsigned long eaddr = vcpu->arch.fault_dear; - int gtlb_index; gfn_t gfn; /* Check the guest TLB. */ - gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); - if (gtlb_index < 0) { + gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); + if (!gtlbe) { /* The guest didn't have a mapping for it. */ - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); + kvmppc_queue_exception(vcpu, exit_nr); vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); + vcpu->stat.dtlb_real_miss_exits++; r = RESUME_GUEST; break; } - gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; @@ -315,45 +359,38 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, - gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); - kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); + kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, + gtlbe->word2); + vcpu->stat.dtlb_virt_miss_exits++; r = RESUME_GUEST; } else { /* Guest has mapped and accessed a page which is not * actually RAM. */ r = kvmppc_emulate_mmio(run, vcpu); - kvmppc_account_exit(vcpu, MMIO_EXITS); } break; } - /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_ITLB_MISS: { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; + struct tlbe *gtlbe; unsigned long eaddr = vcpu->arch.pc; - gpa_t gpaddr; gfn_t gfn; - int gtlb_index; r = RESUME_GUEST; /* Check the guest TLB. */ - gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); - if (gtlb_index < 0) { + gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); + if (!gtlbe) { /* The guest didn't have a mapping for it. */ - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); - kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.itlb_real_miss_exits++; break; } - kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); + vcpu->stat.itlb_virt_miss_exits++; - gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - gpaddr = tlb_xlate(gtlbe, eaddr); - gfn = gpaddr >> PAGE_SHIFT; + gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { /* The guest TLB had a mapping, but the shadow TLB @@ -362,11 +399,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, - gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, + gtlbe->word2); } else { /* Guest mapped and leaped at non-RAM! */ - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); + kvmppc_queue_exception(vcpu, + BOOKE_INTERRUPT_MACHINE_CHECK); } break; @@ -383,7 +421,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, mtspr(SPRN_DBSR, dbsr); run->exit_reason = KVM_EXIT_DEBUG; - kvmppc_account_exit(vcpu, DEBUG_EXITS); r = RESUME_HOST; break; } @@ -395,8 +432,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_disable(); - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_check_and_deliver_interrupts(vcpu); + /* Do some exit accounting. */ + vcpu->stat.sum_exits++; if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -404,7 +443,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (signal_pending(current)) { run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - kvmppc_account_exit(vcpu, SIGNAL_EXITS); + + vcpu->stat.signal_exits++; + } else { + vcpu->stat.light_exits++; + } + } else { + switch (run->exit_reason) { + case KVM_EXIT_MMIO: + vcpu->stat.mmio_exits++; + break; + case KVM_EXIT_DCR: + vcpu->stat.dcr_exits++; + break; + case KVM_EXIT_INTR: + vcpu->stat.signal_exits++; + break; } } @@ -414,6 +468,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; + + tlbe->tid = 0; + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; + tlbe->word1 = 0; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; + + tlbe++; + tlbe->tid = 0; + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; + tlbe->word1 = 0xef600000; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR + | PPC44x_TLB_I | PPC44x_TLB_G; + vcpu->arch.pc = 0; vcpu->arch.msr = 0; vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ @@ -424,9 +492,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) * before it's programmed its own IVPR. */ vcpu->arch.ivpr = 0x55550000; - kvmppc_init_timing_stats(vcpu); + /* Since the guest can directly access the timebase, it must know the + * real timebase frequency. Accordingly, it must see the state of + * CCR1[TCS]. */ + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); - return kvmppc_core_vcpu_setup(vcpu); + return 0; } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -465,7 +536,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.ctr = regs->ctr; vcpu->arch.lr = regs->lr; vcpu->arch.xer = regs->xer; - kvmppc_set_msr(vcpu, regs->msr); + vcpu->arch.msr = regs->msr; vcpu->arch.srr0 = regs->srr0; vcpu->arch.srr1 = regs->srr1; vcpu->arch.sprg0 = regs->sprg0; @@ -504,62 +575,31 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { - return kvmppc_core_vcpu_translate(vcpu, tr); -} + struct tlbe *gtlbe; + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); + if (index == -1) { + tr->valid = 0; + return 0; + } -int kvmppc_booke_init(void) -{ - unsigned long ivor[16]; - unsigned long max_ivor = 0; - int i; + gtlbe = &vcpu->arch.guest_tlb[index]; - /* We install our own exception handlers by hijacking IVPR. IVPR must - * be 16-bit aligned, so we need a 64KB allocation. */ - kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, - VCPU_SIZE_ORDER); - if (!kvmppc_booke_handlers) - return -ENOMEM; - - /* XXX make sure our handlers are smaller than Linux's */ - - /* Copy our interrupt handlers to match host IVORs. That way we don't - * have to swap the IVORs on every guest/host transition. */ - ivor[0] = mfspr(SPRN_IVOR0); - ivor[1] = mfspr(SPRN_IVOR1); - ivor[2] = mfspr(SPRN_IVOR2); - ivor[3] = mfspr(SPRN_IVOR3); - ivor[4] = mfspr(SPRN_IVOR4); - ivor[5] = mfspr(SPRN_IVOR5); - ivor[6] = mfspr(SPRN_IVOR6); - ivor[7] = mfspr(SPRN_IVOR7); - ivor[8] = mfspr(SPRN_IVOR8); - ivor[9] = mfspr(SPRN_IVOR9); - ivor[10] = mfspr(SPRN_IVOR10); - ivor[11] = mfspr(SPRN_IVOR11); - ivor[12] = mfspr(SPRN_IVOR12); - ivor[13] = mfspr(SPRN_IVOR13); - ivor[14] = mfspr(SPRN_IVOR14); - ivor[15] = mfspr(SPRN_IVOR15); - - for (i = 0; i < 16; i++) { - if (ivor[i] > max_ivor) - max_ivor = ivor[i]; - - memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + i * kvmppc_handler_len, - kvmppc_handler_len); - } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + tr->physical_address = tlb_xlate(gtlbe, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; return 0; } - -void __exit kvmppc_booke_exit(void) -{ - free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); - kvm_exit(); -} diff --git a/trunk/arch/powerpc/kvm/booke_host.c b/trunk/arch/powerpc/kvm/booke_host.c new file mode 100644 index 000000000000..b480341bc31e --- /dev/null +++ b/trunk/arch/powerpc/kvm/booke_host.c @@ -0,0 +1,83 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard + */ + +#include +#include +#include +#include +#include + +unsigned long kvmppc_booke_handlers; + +static int kvmppc_booke_init(void) +{ + unsigned long ivor[16]; + unsigned long max_ivor = 0; + int i; + + /* We install our own exception handlers by hijacking IVPR. IVPR must + * be 16-bit aligned, so we need a 64KB allocation. */ + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, + VCPU_SIZE_ORDER); + if (!kvmppc_booke_handlers) + return -ENOMEM; + + /* XXX make sure our handlers are smaller than Linux's */ + + /* Copy our interrupt handlers to match host IVORs. That way we don't + * have to swap the IVORs on every guest/host transition. */ + ivor[0] = mfspr(SPRN_IVOR0); + ivor[1] = mfspr(SPRN_IVOR1); + ivor[2] = mfspr(SPRN_IVOR2); + ivor[3] = mfspr(SPRN_IVOR3); + ivor[4] = mfspr(SPRN_IVOR4); + ivor[5] = mfspr(SPRN_IVOR5); + ivor[6] = mfspr(SPRN_IVOR6); + ivor[7] = mfspr(SPRN_IVOR7); + ivor[8] = mfspr(SPRN_IVOR8); + ivor[9] = mfspr(SPRN_IVOR9); + ivor[10] = mfspr(SPRN_IVOR10); + ivor[11] = mfspr(SPRN_IVOR11); + ivor[12] = mfspr(SPRN_IVOR12); + ivor[13] = mfspr(SPRN_IVOR13); + ivor[14] = mfspr(SPRN_IVOR14); + ivor[15] = mfspr(SPRN_IVOR15); + + for (i = 0; i < 16; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + i * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); +} + +static void __exit kvmppc_booke_exit(void) +{ + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); + kvm_exit(); +} + +module_init(kvmppc_booke_init) +module_exit(kvmppc_booke_exit) diff --git a/trunk/arch/powerpc/kvm/booke_interrupts.S b/trunk/arch/powerpc/kvm/booke_interrupts.S index 084ebcd7dd83..95e165baf85f 100644 --- a/trunk/arch/powerpc/kvm/booke_interrupts.S +++ b/trunk/arch/powerpc/kvm/booke_interrupts.S @@ -107,18 +107,6 @@ _GLOBAL(kvmppc_resume_host) li r6, 1 slw r6, r6, r5 -#ifdef CONFIG_KVM_EXIT_TIMING - /* save exit time */ -1: - mfspr r7, SPRN_TBRU - mfspr r8, SPRN_TBRL - mfspr r9, SPRN_TBRU - cmpw r9, r7 - bne 1b - stw r8, VCPU_TIMING_EXIT_TBL(r4) - stw r9, VCPU_TIMING_EXIT_TBU(r4) -#endif - /* Save the faulting instruction and all GPRs for emulation. */ andi. r7, r6, NEED_INST_MASK beq ..skip_inst_copy @@ -347,6 +335,54 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 + /* Prevent all asynchronous TLB updates. */ + mfmsr r5 + lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h + ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l + andc r6, r5, r6 + mtmsr r6 + + /* Load the guest mappings, leaving the host's "pinned" kernel mappings + * in place. */ + mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ + li r5, PPC44x_TLB_SIZE + lis r5, tlb_44x_hwater@ha + lwz r5, tlb_44x_hwater@l(r5) + mtctr r5 + addi r9, r4, VCPU_SHADOW_TLB + addi r5, r4, VCPU_SHADOW_MOD + li r3, 0 +1: + lbzx r7, r3, r5 + cmpwi r7, 0 + beq 3f + + /* Load guest entry. */ + mulli r11, r3, TLBE_BYTES + add r11, r11, r9 + lwz r7, 0(r11) + mtspr SPRN_MMUCR, r7 + lwz r7, 4(r11) + tlbwe r7, r3, PPC44x_TLB_PAGEID + lwz r7, 8(r11) + tlbwe r7, r3, PPC44x_TLB_XLAT + lwz r7, 12(r11) + tlbwe r7, r3, PPC44x_TLB_ATTRIB +3: + addi r3, r3, 1 /* Increment index. */ + bdnz 1b + + mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ + + /* Clear bitmap of modified TLB entries */ + li r5, PPC44x_TLB_SIZE>>2 + mtctr r5 + addi r5, r4, VCPU_SHADOW_MOD - 4 + li r6, 0 +1: + stwu r6, 4(r5) + bdnz 1b + iccci 0, 0 /* XXX hack */ /* Load some guest volatiles. */ @@ -387,18 +423,6 @@ lightweight_exit: lwz r3, VCPU_SPRG7(r4) mtspr SPRN_SPRG7, r3 -#ifdef CONFIG_KVM_EXIT_TIMING - /* save enter time */ -1: - mfspr r6, SPRN_TBRU - mfspr r7, SPRN_TBRL - mfspr r8, SPRN_TBRU - cmpw r8, r6 - bne 1b - stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4) - stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) -#endif - /* Finish loading guest volatiles and jump to guest. */ lwz r3, VCPU_CTR(r4) mtctr r3 diff --git a/trunk/arch/powerpc/kvm/emulate.c b/trunk/arch/powerpc/kvm/emulate.c index d1d38daa93fb..0fce4fbdc20d 100644 --- a/trunk/arch/powerpc/kvm/emulate.c +++ b/trunk/arch/powerpc/kvm/emulate.c @@ -23,14 +23,161 @@ #include #include -#include +#include +#include #include #include #include -#include -#include "timing.h" -void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) +#include "44x_tlb.h" + +/* Instruction decoding */ +static inline unsigned int get_op(u32 inst) +{ + return inst >> 26; +} + +static inline unsigned int get_xop(u32 inst) +{ + return (inst >> 1) & 0x3ff; +} + +static inline unsigned int get_sprn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_dcrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_rt(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_rs(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_ra(u32 inst) +{ + return (inst >> 16) & 0x1f; +} + +static inline unsigned int get_rb(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_rc(u32 inst) +{ + return inst & 0x1; +} + +static inline unsigned int get_ws(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_d(u32 inst) +{ + return inst & 0xffff; +} + +static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct tlbe *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) +{ + u64 eaddr; + u64 raddr; + u64 asid; + u32 flags; + struct tlbe *tlbe; + unsigned int ra; + unsigned int rs; + unsigned int ws; + unsigned int index; + + ra = get_ra(inst); + rs = get_rs(inst); + ws = get_ws(inst); + + index = vcpu->arch.gpr[ra]; + if (index > PPC44x_TLB_SIZE) { + printk("%s: index %d\n", __func__, index); + kvmppc_dump_vcpu(vcpu); + return EMULATE_FAIL; + } + + tlbe = &vcpu->arch.guest_tlb[index]; + + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ + if (tlbe->word0 & PPC44x_TLB_VALID) { + eaddr = get_tlb_eaddr(tlbe); + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid); + } + + switch (ws) { + case PPC44x_TLB_PAGEID: + tlbe->tid = vcpu->arch.mmucr & 0xff; + tlbe->word0 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_XLAT: + tlbe->word1 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_ATTRIB: + tlbe->word2 = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + if (tlbe_is_host_safe(vcpu, tlbe)) { + eaddr = get_tlb_eaddr(tlbe); + raddr = get_tlb_raddr(tlbe); + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + flags = tlbe->word2 & 0xffff; + + /* Create a 4KB mapping on the host. If the guest wanted a + * large page, only the first 4KB is mapped here and the rest + * are mapped on the fly. */ + kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); + } + + KVMTRACE_5D(GTLB_WRITE, vcpu, index, + tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2, + handler); + + return EMULATE_DONE; +} + +static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { if (vcpu->arch.tcr & TCR_DIE) { /* The decrementer ticks at the same rate as the timebase, so @@ -46,6 +193,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + /* XXX to do: * lhax * lhaux @@ -60,30 +213,40 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) * * XXX is_bigendian should depend on MMU mapping or MSR[LE] */ -/* XXX Should probably auto-generate instruction decoding for a particular core - * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst = vcpu->arch.last_inst; u32 ea; int ra; int rb; + int rc; int rs; int rt; int sprn; + int dcrn; enum emulation_result emulated = EMULATE_DONE; int advance = 1; - /* this default type might be overwritten by subcategories */ - kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); - switch (get_op(inst)) { - case 3: /* trap */ - vcpu->arch.esr |= ESR_PTR; - kvmppc_core_queue_program(vcpu); + case 3: /* trap */ + printk("trap!\n"); + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); advance = 0; break; + case 19: + switch (get_xop(inst)) { + case 50: /* rfi */ + kvmppc_emul_rfi(vcpu); + advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + case 31: switch (get_xop(inst)) { @@ -92,11 +255,27 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; + case 83: /* mfmsr */ + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + break; + case 87: /* lbzx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; + case 131: /* wrtee */ + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + break; + + case 146: /* mtmsr */ + rs = get_rs(inst); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + case 151: /* stwx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -104,6 +283,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 1); break; + case 163: /* wrteei */ + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + break; + case 215: /* stbx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -144,6 +328,42 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; + case 323: /* mfdcr */ + dcrn = get_dcrn(inst); + rt = get_rt(inst); + + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); + local_irq_enable(); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = 0; + run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + emulated = EMULATE_DO_DCR; + } + + break; + case 339: /* mfspr */ sprn = get_sprn(inst); rt = get_rt(inst); @@ -153,8 +373,26 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; case SPRN_SRR1: vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; + case SPRN_MMUCR: + vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_CCR0: + vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; + case SPRN_CCR1: + vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; case SPRN_PVR: vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. @@ -175,12 +413,42 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; + default: - emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); - if (emulated == EMULATE_FAIL) { - printk("mfspr: unknown spr %x\n", sprn); - vcpu->arch.gpr[rt] = 0; - } + printk("mfspr: unknown spr %x\n", sprn); + vcpu->arch.gpr[rt] = 0; break; } break; @@ -210,6 +478,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; + case 451: /* mtdcr */ + dcrn = get_dcrn(inst); + rs = get_rs(inst); + + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = vcpu->arch.gpr[rs]; + run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + emulated = EMULATE_DO_DCR; + } + + break; + case 467: /* mtspr */ sprn = get_sprn(inst); rs = get_rs(inst); @@ -218,6 +505,22 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; case SPRN_SRR1: vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; + case SPRN_PID: + kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; + case SPRN_CCR0: + vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; + case SPRN_CCR1: + vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; /* XXX We need to context-switch the timebase for * watchdog and FIT. */ @@ -229,6 +532,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_emulate_dec(vcpu); break; + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + case SPRN_SPRG0: vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; case SPRN_SPRG1: @@ -238,10 +549,56 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_SPRG3: vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR0: + vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR1: + vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR2: + vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR3: + vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR4: + vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR5: + vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR6: + vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR7: + vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR8: + vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR9: + vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR10: + vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR11: + vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR12: + vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR13: + vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR14: + vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR15: + vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; + default: - emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); - if (emulated == EMULATE_FAIL) - printk("mtspr: unknown spr %x\n", sprn); + printk("mtspr: unknown spr %x\n", sprn); + emulated = EMULATE_FAIL; break; } break; @@ -272,6 +629,36 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 0); break; + case 978: /* tlbwe */ + emulated = kvmppc_emul_tlbwe(vcpu, inst); + break; + + case 914: { /* tlbsx */ + int index; + unsigned int as = get_mmucr_sts(vcpu); + unsigned int pid = get_mmucr_stid(vcpu); + + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + rc = get_rc(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); + if (rc) { + if (index < 0) + vcpu->arch.cr &= ~0x20000000; + else + vcpu->arch.cr |= 0x20000000; + } + vcpu->arch.gpr[rt] = index; + + } + break; + case 790: /* lhbrx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); @@ -287,9 +674,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 2, 0); break; + case 966: /* iccci */ + break; + default: - /* Attempt core-specific emulation below. */ + printk("unknown: op %d xop %d\n", get_op(inst), + get_xop(inst)); emulated = EMULATE_FAIL; + break; } break; @@ -372,19 +764,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: + printk("unknown op %d\n", get_op(inst)); emulated = EMULATE_FAIL; + break; } - if (emulated == EMULATE_FAIL) { - emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); - if (emulated == EMULATE_FAIL) { - advance = 0; - printk(KERN_ERR "Couldn't emulate instruction 0x%08x " - "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); - } - } - - KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit); + KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); if (advance) vcpu->arch.pc += 4; /* Advance past emulated instruction. */ diff --git a/trunk/arch/powerpc/kvm/powerpc.c b/trunk/arch/powerpc/kvm/powerpc.c index 2822c8ccfaaf..8bef0efcdfe1 100644 --- a/trunk/arch/powerpc/kvm/powerpc.c +++ b/trunk/arch/powerpc/kvm/powerpc.c @@ -28,9 +28,9 @@ #include #include #include -#include "timing.h" #include "../mm/mmu_decl.h" + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; @@ -99,7 +99,14 @@ void kvm_arch_hardware_unsetup(void) void kvm_arch_check_processor_compat(void *rtn) { - *(int *)rtn = kvmppc_core_check_processor_compat(); + int r; + + if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + r = 0; + else + r = -ENOTSUPP; + + *(int *)rtn = r; } struct kvm *kvm_arch_create_vm(void) @@ -137,6 +144,9 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { + case KVM_CAP_USER_MEMORY: + r = 1; + break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; @@ -169,15 +179,30 @@ void kvm_arch_flush_shadow(struct kvm *kvm) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; - vcpu = kvmppc_core_vcpu_create(kvm, id); - kvmppc_create_vcpu_debugfs(vcpu, id); + int err; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + return vcpu; + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(err); } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { - kvmppc_remove_vcpu_debugfs(vcpu); - kvmppc_core_vcpu_free(vcpu); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -187,14 +212,16 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvmppc_core_pending_dec(vcpu); + unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; + + return test_bit(priority, &vcpu->arch.pending_exceptions); } static void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_core_queue_dec(vcpu); + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); @@ -215,25 +242,96 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_core_destroy_mmu(vcpu); } +/* Note: clearing MSR[DE] just means that the debug interrupt will not be + * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. + * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt + * will be delivered as an "imprecise debug event" (which is indicated by + * DBSR[IDE]. + */ +static void kvmppc_disable_debug_interrupts(void) +{ + mtmsr(mfmsr() & ~MSR_DE); +} + +static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu) +{ + kvmppc_disable_debug_interrupts(); + + mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); + mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); + mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); + mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); + mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); + mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); + mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); + mtmsr(vcpu->arch.host_msr); +} + +static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu) +{ + struct kvm_guest_debug *dbg = &vcpu->guest_debug; + u32 dbcr0 = 0; + + vcpu->arch.host_msr = mfmsr(); + kvmppc_disable_debug_interrupts(); + + /* Save host debug register state. */ + vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); + vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); + vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); + vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); + vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); + vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); + vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); + + /* set registers up for guest */ + + if (dbg->bp[0]) { + mtspr(SPRN_IAC1, dbg->bp[0]); + dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; + } + if (dbg->bp[1]) { + mtspr(SPRN_IAC2, dbg->bp[1]); + dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; + } + if (dbg->bp[2]) { + mtspr(SPRN_IAC3, dbg->bp[2]); + dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; + } + if (dbg->bp[3]) { + mtspr(SPRN_IAC4, dbg->bp[3]); + dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; + } + + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBCR1, 0); + mtspr(SPRN_DBCR2, 0); +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + int i; + if (vcpu->guest_debug.enabled) - kvmppc_core_load_guest_debugstate(vcpu); + kvmppc_load_guest_debug_registers(vcpu); - kvmppc_core_vcpu_load(vcpu, cpu); + /* Mark every guest entry in the shadow TLB entry modified, so that they + * will all be reloaded on the next vcpu run (instead of being + * demand-faulted). */ + for (i = 0; i <= tlb_44x_hwater; i++) + kvmppc_tlbe_set_modified(vcpu, i); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { if (vcpu->guest_debug.enabled) - kvmppc_core_load_host_debugstate(vcpu); + kvmppc_restore_host_debug_state(vcpu); /* Don't leave guest TLB entries resident when being de-scheduled. */ /* XXX It would be nice to differentiate between heavyweight exit and * sched_out here, since we could avoid the TLB flush for heavyweight * exits. */ _tlbil_all(); - kvmppc_core_vcpu_put(vcpu); } int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, @@ -257,14 +355,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; *gpr = run->dcr.data; } static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; if (run->mmio.len > sizeof(*gpr)) { printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); @@ -362,7 +460,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->arch.dcr_needed = 0; } - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_check_and_deliver_interrupts(vcpu); local_irq_disable(); kvm_guest_enter(); @@ -380,7 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_core_queue_external(vcpu, irq); + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); diff --git a/trunk/arch/powerpc/kvm/timing.c b/trunk/arch/powerpc/kvm/timing.c deleted file mode 100644 index 47ee603f558e..000000000000 --- a/trunk/arch/powerpc/kvm/timing.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard - * Christian Ehrhardt - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include "timing.h" - -void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) -{ - int i; - - /* pause guest execution to avoid concurrent updates */ - local_irq_disable(); - mutex_lock(&vcpu->mutex); - - vcpu->arch.last_exit_type = 0xDEAD; - for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { - vcpu->arch.timing_count_type[i] = 0; - vcpu->arch.timing_max_duration[i] = 0; - vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF; - vcpu->arch.timing_sum_duration[i] = 0; - vcpu->arch.timing_sum_quad_duration[i] = 0; - } - vcpu->arch.timing_last_exit = 0; - vcpu->arch.timing_exit.tv64 = 0; - vcpu->arch.timing_last_enter.tv64 = 0; - - mutex_unlock(&vcpu->mutex); - local_irq_enable(); -} - -static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) -{ - u64 old; - - do_div(duration, tb_ticks_per_usec); - if (unlikely(duration > 0xFFFFFFFF)) { - printk(KERN_ERR"%s - duration too big -> overflow" - " duration %lld type %d exit #%d\n", - __func__, duration, type, - vcpu->arch.timing_count_type[type]); - return; - } - - vcpu->arch.timing_count_type[type]++; - - /* sum */ - old = vcpu->arch.timing_sum_duration[type]; - vcpu->arch.timing_sum_duration[type] += duration; - if (unlikely(old > vcpu->arch.timing_sum_duration[type])) { - printk(KERN_ERR"%s - wrap adding sum of durations" - " old %lld new %lld type %d exit # of type %d\n", - __func__, old, vcpu->arch.timing_sum_duration[type], - type, vcpu->arch.timing_count_type[type]); - } - - /* square sum */ - old = vcpu->arch.timing_sum_quad_duration[type]; - vcpu->arch.timing_sum_quad_duration[type] += (duration*duration); - if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) { - printk(KERN_ERR"%s - wrap adding sum of squared durations" - " old %lld new %lld type %d exit # of type %d\n", - __func__, old, - vcpu->arch.timing_sum_quad_duration[type], - type, vcpu->arch.timing_count_type[type]); - } - - /* set min/max */ - if (unlikely(duration < vcpu->arch.timing_min_duration[type])) - vcpu->arch.timing_min_duration[type] = duration; - if (unlikely(duration > vcpu->arch.timing_max_duration[type])) - vcpu->arch.timing_max_duration[type] = duration; -} - -void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) -{ - u64 exit = vcpu->arch.timing_last_exit; - u64 enter = vcpu->arch.timing_last_enter.tv64; - - /* save exit time, used next exit when the reenter time is known */ - vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64; - - if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0)) - return; /* skip incomplete cycle (e.g. after reset) */ - - /* update statistics for average and standard deviation */ - add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type); - /* enter -> timing_last_exit is time spent in guest - log this too */ - add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter), - TIMEINGUEST); -} - -static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { - [MMIO_EXITS] = "MMIO", - [DCR_EXITS] = "DCR", - [SIGNAL_EXITS] = "SIGNAL", - [ITLB_REAL_MISS_EXITS] = "ITLBREAL", - [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", - [DTLB_REAL_MISS_EXITS] = "DTLBREAL", - [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT", - [SYSCALL_EXITS] = "SYSCALL", - [ISI_EXITS] = "ISI", - [DSI_EXITS] = "DSI", - [EMULATED_INST_EXITS] = "EMULINST", - [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT", - [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE", - [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR", - [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR", - [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR", - [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR", - [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX", - [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE", - [EMULATED_RFI_EXITS] = "EMUL_RFI", - [DEC_EXITS] = "DEC", - [EXT_INTR_EXITS] = "EXTINT", - [HALT_WAKEUP] = "HALT", - [USR_PR_INST] = "USR_PR_INST", - [FP_UNAVAIL] = "FP_UNAVAIL", - [DEBUG_EXITS] = "DEBUG", - [TIMEINGUEST] = "TIMEINGUEST" -}; - -static int kvmppc_exit_timing_show(struct seq_file *m, void *private) -{ - struct kvm_vcpu *vcpu = m->private; - int i; - - seq_printf(m, "%s", "type count min max sum sum_squared\n"); - - for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { - seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", - kvm_exit_names[i], - vcpu->arch.timing_count_type[i], - vcpu->arch.timing_min_duration[i], - vcpu->arch.timing_max_duration[i], - vcpu->arch.timing_sum_duration[i], - vcpu->arch.timing_sum_quad_duration[i]); - } - return 0; -} - -/* Write 'c' to clear the timing statistics. */ -static ssize_t kvmppc_exit_timing_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - int err = -EINVAL; - char c; - - if (count > 1) { - goto done; - } - - if (get_user(c, user_buf)) { - err = -EFAULT; - goto done; - } - - if (c == 'c') { - struct seq_file *seqf = (struct seq_file *)file->private_data; - struct kvm_vcpu *vcpu = seqf->private; - /* Write does not affect our buffers previously generated with - * show. seq_file is locked here to prevent races of init with - * a show call */ - mutex_lock(&seqf->lock); - kvmppc_init_timing_stats(vcpu); - mutex_unlock(&seqf->lock); - err = count; - } - -done: - return err; -} - -static int kvmppc_exit_timing_open(struct inode *inode, struct file *file) -{ - return single_open(file, kvmppc_exit_timing_show, inode->i_private); -} - -static struct file_operations kvmppc_exit_timing_fops = { - .owner = THIS_MODULE, - .open = kvmppc_exit_timing_open, - .read = seq_read, - .write = kvmppc_exit_timing_write, - .llseek = seq_lseek, - .release = single_release, -}; - -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) -{ - static char dbg_fname[50]; - struct dentry *debugfs_file; - - snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", - current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, - kvm_debugfs_dir, vcpu, - &kvmppc_exit_timing_fops); - - if (!debugfs_file) { - printk(KERN_ERR"%s: error creating debugfs file %s\n", - __func__, dbg_fname); - return; - } - - vcpu->arch.debugfs_exit_timing = debugfs_file; -} - -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.debugfs_exit_timing) { - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; - } -} diff --git a/trunk/arch/powerpc/kvm/timing.h b/trunk/arch/powerpc/kvm/timing.h deleted file mode 100644 index bb13b1f3cd5a..000000000000 --- a/trunk/arch/powerpc/kvm/timing.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Christian Ehrhardt - */ - -#ifndef __POWERPC_KVM_EXITTIMING_H__ -#define __POWERPC_KVM_EXITTIMING_H__ - -#include -#include - -#ifdef CONFIG_KVM_EXIT_TIMING -void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); -void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); - -static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) -{ - vcpu->arch.last_exit_type = type; -} - -#else -/* if exit timing is not configured there is no need to build the c file */ -static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} -static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} -static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, - unsigned int id) {} -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} -static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} -#endif /* CONFIG_KVM_EXIT_TIMING */ - -/* account the exit in kvm_stats */ -static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) -{ - /* type has to be known at build time for optimization */ - BUILD_BUG_ON(__builtin_constant_p(type)); - switch (type) { - case EXT_INTR_EXITS: - vcpu->stat.ext_intr_exits++; - break; - case DEC_EXITS: - vcpu->stat.dec_exits++; - break; - case EMULATED_INST_EXITS: - vcpu->stat.emulated_inst_exits++; - break; - case DCR_EXITS: - vcpu->stat.dcr_exits++; - break; - case DSI_EXITS: - vcpu->stat.dsi_exits++; - break; - case ISI_EXITS: - vcpu->stat.isi_exits++; - break; - case SYSCALL_EXITS: - vcpu->stat.syscall_exits++; - break; - case DTLB_REAL_MISS_EXITS: - vcpu->stat.dtlb_real_miss_exits++; - break; - case DTLB_VIRT_MISS_EXITS: - vcpu->stat.dtlb_virt_miss_exits++; - break; - case MMIO_EXITS: - vcpu->stat.mmio_exits++; - break; - case ITLB_REAL_MISS_EXITS: - vcpu->stat.itlb_real_miss_exits++; - break; - case ITLB_VIRT_MISS_EXITS: - vcpu->stat.itlb_virt_miss_exits++; - break; - case SIGNAL_EXITS: - vcpu->stat.signal_exits++; - break; - } -} - -/* wrapper to set exit time and account for it in kvm_stats */ -static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type) -{ - kvmppc_set_exit_type(vcpu, type); - kvmppc_account_exit_stat(vcpu, type); -} - -#endif /* __POWERPC_KVM_EXITTIMING_H__ */ diff --git a/trunk/arch/powerpc/platforms/pseries/xics.c b/trunk/arch/powerpc/platforms/pseries/xics.c index 84e058f1e1cc..f7a69021b7bf 100644 --- a/trunk/arch/powerpc/platforms/pseries/xics.c +++ b/trunk/arch/powerpc/platforms/pseries/xics.c @@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) +static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) { unsigned int irq; int status; @@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void) /* Reset affinity to all cpus */ irq_desc[virq].affinity = CPU_MASK_ALL; - desc->chip->set_affinity(virq, cpu_all_mask); + desc->chip->set_affinity(virq, CPU_MASK_ALL); unlock: spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/trunk/arch/powerpc/sysdev/mpic.c b/trunk/arch/powerpc/sysdev/mpic.c index 3e0d89dcdba2..c82babb70074 100644 --- a/trunk/arch/powerpc/sysdev/mpic.c +++ b/trunk/arch/powerpc/sysdev/mpic.c @@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) } else { cpumask_t tmp; - cpumask_and(&tmp, cpumask, cpu_online_mask); + cpus_and(tmp, cpumask, cpu_online_map); mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); diff --git a/trunk/arch/powerpc/sysdev/mpic.h b/trunk/arch/powerpc/sysdev/mpic.h index 3cef2af10f42..6209c62a426d 100644 --- a/trunk/arch/powerpc/sysdev/mpic.h +++ b/trunk/arch/powerpc/sysdev/mpic.h @@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic) extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); extern void mpic_set_vector(unsigned int virq, unsigned int vector); -extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); +extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask); #endif /* _POWERPC_SYSDEV_MPIC_H */ diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig index 19577aeffd7b..8152fefc97b9 100644 --- a/trunk/arch/s390/Kconfig +++ b/trunk/arch/s390/Kconfig @@ -83,7 +83,6 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK - select INIT_ALL_POSSIBLE source "init/Kconfig" diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c index 3ed5c7a83c6c..6fc78541dc57 100644 --- a/trunk/arch/s390/kernel/smp.c +++ b/trunk/arch/s390/kernel/smp.c @@ -55,6 +55,12 @@ struct _lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); +cpumask_t cpu_online_map = CPU_MASK_NONE; +EXPORT_SYMBOL(cpu_online_map); + +cpumask_t cpu_possible_map = CPU_MASK_ALL; +EXPORT_SYMBOL(cpu_possible_map); + static struct task_struct *current_set[NR_CPUS]; static u8 smp_cpu_type; diff --git a/trunk/arch/s390/kernel/time.c b/trunk/arch/s390/kernel/time.c index d649600df5b9..5be981a36c3e 100644 --- a/trunk/arch/s390/kernel/time.c +++ b/trunk/arch/s390/kernel/time.c @@ -160,7 +160,7 @@ void init_cpu_timer(void) cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; - cd->cpumask = cpumask_of(cpu); + cd->cpumask = cpumask_of_cpu(cpu); cd->set_next_event = s390_next_event; cd->set_mode = s390_set_mode; diff --git a/trunk/arch/s390/kvm/kvm-s390.c b/trunk/arch/s390/kvm/kvm-s390.c index be8497186b96..8b00eb2ddf57 100644 --- a/trunk/arch/s390/kvm/kvm-s390.c +++ b/trunk/arch/s390/kvm/kvm-s390.c @@ -113,6 +113,8 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_dev_ioctl_check_extension(long ext) { switch (ext) { + case KVM_CAP_USER_MEMORY: + return 1; default: return 0; } @@ -183,6 +185,8 @@ struct kvm *kvm_arch_create_vm(void) debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); + try_module_get(THIS_MODULE); + return kvm; out_nodbf: free_page((unsigned long)(kvm->arch.sca)); @@ -192,33 +196,13 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(rc); } -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - VCPU_EVENT(vcpu, 3, "%s", "free cpu"); - free_page((unsigned long)(vcpu->arch.sie_block)); - kvm_vcpu_uninit(vcpu); - kfree(vcpu); -} - -static void kvm_free_vcpus(struct kvm *kvm) -{ - unsigned int i; - - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - if (kvm->vcpus[i]) { - kvm_arch_vcpu_destroy(kvm->vcpus[i]); - kvm->vcpus[i] = NULL; - } - } -} - void kvm_arch_destroy_vm(struct kvm *kvm) { - kvm_free_vcpus(kvm); + debug_unregister(kvm->arch.dbf); kvm_free_physmem(kvm); free_page((unsigned long)(kvm->arch.sca)); - debug_unregister(kvm->arch.dbf); kfree(kvm); + module_put(THIS_MODULE); } /* Section: vcpu related */ @@ -229,7 +213,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - /* Nothing todo */ + /* kvm common code refers to this, but does'nt call it */ + BUG(); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -323,6 +308,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); + try_module_get(THIS_MODULE); + return vcpu; out_free_cpu: kfree(vcpu); @@ -330,6 +317,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, return ERR_PTR(rc); } +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); + free_page((unsigned long)(vcpu->arch.sie_block)); + kfree(vcpu); + module_put(THIS_MODULE); +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ diff --git a/trunk/arch/sh/include/asm/smp.h b/trunk/arch/sh/include/asm/smp.h index c24e9c6a1736..85b660c17eb0 100644 --- a/trunk/arch/sh/include/asm/smp.h +++ b/trunk/arch/sh/include/asm/smp.h @@ -31,7 +31,7 @@ enum { }; void smp_message_recv(unsigned int msg); -void smp_timer_broadcast(const struct cpumask *mask); +void smp_timer_broadcast(cpumask_t mask); void local_timer_interrupt(void); void local_timer_setup(unsigned int cpu); diff --git a/trunk/arch/sh/include/asm/topology.h b/trunk/arch/sh/include/asm/topology.h index 279d9cc4a007..95f0085e098a 100644 --- a/trunk/arch/sh/include/asm/topology.h +++ b/trunk/arch/sh/include/asm/topology.h @@ -5,6 +5,7 @@ /* sched_domains SD_NODE_INIT for sh machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/sh/kernel/smp.c b/trunk/arch/sh/kernel/smp.c index 8f4027412614..3c5ad1660bbc 100644 --- a/trunk/arch/sh/kernel/smp.c +++ b/trunk/arch/sh/kernel/smp.c @@ -31,6 +31,12 @@ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + +cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); + static inline void __init smp_store_cpu_info(unsigned int cpu) { struct sh_cpuinfo *c = cpu_data + cpu; @@ -184,11 +190,11 @@ void arch_send_call_function_single_ipi(int cpu) plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE); } -void smp_timer_broadcast(const struct cpumask *mask) +void smp_timer_broadcast(cpumask_t mask) { int cpu; - for_each_cpu(cpu, mask) + for_each_cpu_mask(cpu, mask) plat_send_ipi(cpu, SMP_MSG_TIMER); } diff --git a/trunk/arch/sh/kernel/timers/timer-broadcast.c b/trunk/arch/sh/kernel/timers/timer-broadcast.c index 96e8eaea1e62..c2317635230f 100644 --- a/trunk/arch/sh/kernel/timers/timer-broadcast.c +++ b/trunk/arch/sh/kernel/timers/timer-broadcast.c @@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu) clk->mult = 1; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; - clk->cpumask = cpumask_of(cpu); + clk->cpumask = cpumask_of_cpu(cpu); clockevents_register_device(clk); } diff --git a/trunk/arch/sh/kernel/timers/timer-tmu.c b/trunk/arch/sh/kernel/timers/timer-tmu.c index 0db3f9510336..3c61ddd4d43e 100644 --- a/trunk/arch/sh/kernel/timers/timer-tmu.c +++ b/trunk/arch/sh/kernel/timers/timer-tmu.c @@ -263,7 +263,7 @@ static int tmu_timer_init(void) tmu0_clockevent.min_delta_ns = clockevent_delta2ns(1, &tmu0_clockevent); - tmu0_clockevent.cpumask = cpumask_of(0); + tmu0_clockevent.cpumask = cpumask_of_cpu(0); clockevents_register_device(&tmu0_clockevent); diff --git a/trunk/arch/sparc/include/asm/smp_32.h b/trunk/arch/sparc/include/asm/smp_32.h index 8408d9d2a662..a8180e546a48 100644 --- a/trunk/arch/sparc/include/asm/smp_32.h +++ b/trunk/arch/sparc/include/asm/smp_32.h @@ -29,6 +29,8 @@ */ extern unsigned char boot_cpu_id; +extern cpumask_t phys_cpu_present_map; +#define cpu_possible_map phys_cpu_present_map typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/trunk/arch/sparc/kernel/irq_64.c b/trunk/arch/sparc/kernel/irq_64.c index cab8e0286871..a3ea2bcb95de 100644 --- a/trunk/arch/sparc/kernel/irq_64.c +++ b/trunk/arch/sparc/kernel/irq_64.c @@ -312,8 +312,7 @@ static void sun4u_irq_enable(unsigned int virt_irq) } } -static void sun4u_set_affinity(unsigned int virt_irq, - const struct cpumask *mask) +static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) { sun4u_irq_enable(virt_irq); } @@ -363,8 +362,7 @@ static void sun4v_irq_enable(unsigned int virt_irq) ino, err); } -static void sun4v_set_affinity(unsigned int virt_irq, - const struct cpumask *mask) +static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) { unsigned int ino = virt_irq_table[virt_irq].dev_ino; unsigned long cpuid = irq_choose_cpu(virt_irq); @@ -431,8 +429,7 @@ static void sun4v_virq_enable(unsigned int virt_irq) dev_handle, dev_ino, err); } -static void sun4v_virt_set_affinity(unsigned int virt_irq, - const struct cpumask *mask) +static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) { unsigned long cpuid, dev_handle, dev_ino; int err; @@ -854,7 +851,7 @@ void fixup_irqs(void) !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - &irq_desc[irq].affinity); + irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } diff --git a/trunk/arch/sparc/kernel/of_device_64.c b/trunk/arch/sparc/kernel/of_device_64.c index 322046cdf85f..46e231f7c5ce 100644 --- a/trunk/arch/sparc/kernel/of_device_64.c +++ b/trunk/arch/sparc/kernel/of_device_64.c @@ -780,7 +780,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op, if (nid != -1) { cpumask_t numa_mask = node_to_cpumask(nid); - irq_set_affinity(irq, &numa_mask); + irq_set_affinity(irq, numa_mask); } return irq; diff --git a/trunk/arch/sparc/kernel/pci_msi.c b/trunk/arch/sparc/kernel/pci_msi.c index 0d0cd815e83e..2e680f34f727 100644 --- a/trunk/arch/sparc/kernel/pci_msi.c +++ b/trunk/arch/sparc/kernel/pci_msi.c @@ -288,7 +288,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, if (nid != -1) { cpumask_t numa_mask = node_to_cpumask(nid); - irq_set_affinity(irq, &numa_mask); + irq_set_affinity(irq, numa_mask); } err = request_irq(irq, sparc64_msiq_interrupt, 0, "MSIQ", diff --git a/trunk/arch/sparc/kernel/smp_32.c b/trunk/arch/sparc/kernel/smp_32.c index 1e5ac4e282e1..e396c1f17a92 100644 --- a/trunk/arch/sparc/kernel/smp_32.c +++ b/trunk/arch/sparc/kernel/smp_32.c @@ -39,6 +39,8 @@ volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; unsigned char boot_cpu_id = 0; unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t phys_cpu_present_map = CPU_MASK_NONE; cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* The only guaranteed locking primitive available on all Sparc @@ -332,7 +334,7 @@ void __init smp_setup_cpu_possible_map(void) instance = 0; while (!cpu_find_by_instance(instance, NULL, &mid)) { if (mid < NR_CPUS) { - cpu_set(mid, cpu_possible_map); + cpu_set(mid, phys_cpu_present_map); cpu_set(mid, cpu_present_map); } instance++; @@ -352,7 +354,7 @@ void __init smp_prepare_boot_cpu(void) current_thread_info()->cpu = cpuid; cpu_set(cpuid, cpu_online_map); - cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, phys_cpu_present_map); } int __cpuinit __cpu_up(unsigned int cpu) diff --git a/trunk/arch/sparc/kernel/smp_64.c b/trunk/arch/sparc/kernel/smp_64.c index 46329799f346..bfe99d82d458 100644 --- a/trunk/arch/sparc/kernel/smp_64.c +++ b/trunk/arch/sparc/kernel/smp_64.c @@ -49,10 +49,14 @@ int sparc64_multi_core __read_mostly; +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; +cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +EXPORT_SYMBOL(cpu_possible_map); +EXPORT_SYMBOL(cpu_online_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); diff --git a/trunk/arch/sparc/kernel/sparc_ksyms_32.c b/trunk/arch/sparc/kernel/sparc_ksyms_32.c index e1e97639231b..a4d45fc29b21 100644 --- a/trunk/arch/sparc/kernel/sparc_ksyms_32.c +++ b/trunk/arch/sparc/kernel/sparc_ksyms_32.c @@ -112,6 +112,10 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data); #ifdef CONFIG_SMP /* IRQ implementation. */ EXPORT_SYMBOL(synchronize_irq); + +/* CPU online map and active count. */ +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(phys_cpu_present_map); #endif EXPORT_SYMBOL(__udelay); diff --git a/trunk/arch/sparc/kernel/time_64.c b/trunk/arch/sparc/kernel/time_64.c index 9df8f095a8b1..141da3759091 100644 --- a/trunk/arch/sparc/kernel/time_64.c +++ b/trunk/arch/sparc/kernel/time_64.c @@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void) sevt = &__get_cpu_var(sparc64_events); memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); - sevt->cpumask = cpumask_of(smp_processor_id()); + sevt->cpumask = cpumask_of_cpu(smp_processor_id()); clockevents_register_device(sevt); } diff --git a/trunk/arch/um/kernel/smp.c b/trunk/arch/um/kernel/smp.c index 98351c78bc81..045772142844 100644 --- a/trunk/arch/um/kernel/smp.c +++ b/trunk/arch/um/kernel/smp.c @@ -25,6 +25,13 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #include "irq_user.h" #include "os.h" +/* CPU online map, set by smp_boot_cpus */ +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t cpu_possible_map = CPU_MASK_NONE; + +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_possible_map); + /* Per CPU bogomips and other parameters * The only piece used here is the ipi pipe, which is set before SMP is * started and never changed. diff --git a/trunk/arch/um/kernel/time.c b/trunk/arch/um/kernel/time.c index b13a87a3ec95..47f04f4a3464 100644 --- a/trunk/arch/um/kernel/time.c +++ b/trunk/arch/um/kernel/time.c @@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta, static struct clock_event_device itimer_clockevent = { .name = "itimer", .rating = 250, - .cpumask = cpu_all_mask, + .cpumask = CPU_MASK_ALL, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = itimer_set_mode, .set_next_event = itimer_next_event, diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 249d1e0824b5..0f44add3e0b7 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -601,20 +601,19 @@ config IOMMU_HELPER config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL - select CPUMASK_OFFSTACK + depends on X86_64 && SMP && BROKEN default n help Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. config NR_CPUS - int "Maximum number of CPUs" if SMP && !MAXSMP - range 2 512 if SMP && !MAXSMP - default "1" if !SMP + int "Maximum number of CPUs (2-512)" if !MAXSMP + range 2 512 + depends on SMP default "4096" if MAXSMP - default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) - default "8" if SMP + default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 + default "8" help This allows you to specify the maximum number of CPUs which this kernel will support. The maximum supported value is 512 and the diff --git a/trunk/arch/x86/ia32/ia32_signal.c b/trunk/arch/x86/ia32/ia32_signal.c index 9dabd00e9805..b195f85526e3 100644 --- a/trunk/arch/x86/ia32/ia32_signal.c +++ b/trunk/arch/x86/ia32/ia32_signal.c @@ -24,14 +24,15 @@ #include #include #include +#include #include #include #include #include #include #include + #include -#include #define DEBUG_SIG 0 diff --git a/trunk/arch/x86/ia32/ipc32.c b/trunk/arch/x86/ia32/ipc32.c index 29cdcd02ead3..d21991ce606c 100644 --- a/trunk/arch/x86/ia32/ipc32.c +++ b/trunk/arch/x86/ia32/ipc32.c @@ -8,7 +8,6 @@ #include #include #include -#include asmlinkage long sys32_ipc(u32 call, int first, int second, int third, compat_uptr_t ptr, u32 fifth) diff --git a/trunk/arch/x86/ia32/sys_ia32.c b/trunk/arch/x86/ia32/sys_ia32.c index 6c0d7f6231af..2e09dcd3c0a6 100644 --- a/trunk/arch/x86/ia32/sys_ia32.c +++ b/trunk/arch/x86/ia32/sys_ia32.c @@ -44,8 +44,8 @@ #include #include #include +#include #include -#include #define AA(__x) ((unsigned long)(__x)) diff --git a/trunk/arch/x86/include/asm/apic.h b/trunk/arch/x86/include/asm/apic.h index ab1d51a8855e..25caa0738af5 100644 --- a/trunk/arch/x86/include/asm/apic.h +++ b/trunk/arch/x86/include/asm/apic.h @@ -54,6 +54,7 @@ extern int disable_apic; extern int is_vsmp_box(void); extern void xapic_wait_icr_idle(void); extern u32 safe_xapic_wait_icr_idle(void); +extern u64 xapic_icr_read(void); extern void xapic_icr_write(u32, u32); extern int setup_profiling_timer(unsigned int); @@ -92,7 +93,7 @@ static inline u32 native_apic_msr_read(u32 reg) } #ifndef CONFIG_X86_32 -extern int x2apic; +extern int x2apic, x2apic_preenabled; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); diff --git a/trunk/arch/x86/include/asm/bigsmp/apic.h b/trunk/arch/x86/include/asm/bigsmp/apic.h index d8dd9f537911..ce547f24a1cd 100644 --- a/trunk/arch/x86/include/asm/bigsmp/apic.h +++ b/trunk/arch/x86/include/asm/bigsmp/apic.h @@ -9,12 +9,12 @@ static inline int apic_id_registered(void) return (1); } -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { #ifdef CONFIG_SMP - return &cpu_online_map; + return cpu_online_map; #else - return &cpumask_of_cpu(0); + return cpumask_of_cpu(0); #endif } @@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < nr_cpu_ids) + if (mps_cpu < NR_CPUS) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); return BAD_APICID; @@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= nr_cpu_ids) + if (cpu >= NR_CPUS) return BAD_APICID; return cpu_physical_id(cpu); } @@ -119,34 +119,16 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; int apicid; - cpu = first_cpu(*cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return cpu_to_logical_apicid(cpu); - - return BAD_APICID; -} - static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/trunk/arch/x86/include/asm/bigsmp/ipi.h b/trunk/arch/x86/include/asm/bigsmp/ipi.h index 27fcd01b3ae6..9404c535b7ec 100644 --- a/trunk/arch/x86/include/asm/bigsmp/ipi.h +++ b/trunk/arch/x86/include/asm/bigsmp/ipi.h @@ -1,22 +1,25 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/trunk/arch/x86/include/asm/desc.h b/trunk/arch/x86/include/asm/desc.h index dc27705f5443..e6b82b17b072 100644 --- a/trunk/arch/x86/include/asm/desc.h +++ b/trunk/arch/x86/include/asm/desc.h @@ -320,14 +320,16 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } +#define SYS_VECTOR_FREE 0 +#define SYS_VECTOR_ALLOCED 1 + extern int first_system_vector; -/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ -extern unsigned long used_vectors[]; +extern char system_vectors[]; static inline void alloc_system_vector(int vector) { - if (!test_bit(vector, used_vectors)) { - set_bit(vector, used_vectors); + if (system_vectors[vector] == SYS_VECTOR_FREE) { + system_vectors[vector] = SYS_VECTOR_ALLOCED; if (first_system_vector > vector) first_system_vector = vector; } else diff --git a/trunk/arch/x86/include/asm/efi.h b/trunk/arch/x86/include/asm/efi.h index ca5ffb2856b6..a2e545c91c35 100644 --- a/trunk/arch/x86/include/asm/efi.h +++ b/trunk/arch/x86/include/asm/efi.h @@ -90,7 +90,6 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); #endif /* CONFIG_X86_32 */ -extern int add_efi_memmap; extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/trunk/arch/x86/include/asm/es7000/apic.h b/trunk/arch/x86/include/asm/es7000/apic.h index 51ac1230294e..e24ef876915f 100644 --- a/trunk/arch/x86/include/asm/es7000/apic.h +++ b/trunk/arch/x86/include/asm/es7000/apic.h @@ -9,14 +9,14 @@ static inline int apic_id_registered(void) return (1); } -static inline const cpumask_t *target_cpus_cluster(void) +static inline cpumask_t target_cpus_cluster(void) { - return &CPU_MASK_ALL; + return CPU_MASK_ALL; } -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { - return &cpumask_of_cpu(smp_processor_id()); + return cpumask_of_cpu(smp_processor_id()); } #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) @@ -80,10 +80,9 @@ extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*target_cpus())[0]); + "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) @@ -101,7 +100,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) + else if (mps_cpu < NR_CPUS) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -121,9 +120,9 @@ extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif @@ -147,15 +146,14 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) return (1); } -static inline unsigned int -cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpumask_weight(cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return 0xFF; @@ -163,10 +161,10 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -181,14 +179,14 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(*cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return cpu_to_logical_apicid(0); @@ -196,52 +194,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid = cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -253,8 +209,6 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, } cpu++; } -exit: - free_cpumask_var(cpumask); return apicid; } diff --git a/trunk/arch/x86/include/asm/es7000/ipi.h b/trunk/arch/x86/include/asm/es7000/ipi.h index 7e8ed24d4b8a..632a955fcc0a 100644 --- a/trunk/arch/x86/include/asm/es7000/ipi.h +++ b/trunk/arch/x86/include/asm/es7000/ipi.h @@ -1,22 +1,24 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/trunk/arch/x86/include/asm/genapic_32.h b/trunk/arch/x86/include/asm/genapic_32.h index 746f37a7963a..0ac17d33a8c7 100644 --- a/trunk/arch/x86/include/asm/genapic_32.h +++ b/trunk/arch/x86/include/asm/genapic_32.h @@ -24,7 +24,7 @@ struct genapic { int (*probe)(void); int (*apic_id_registered)(void); - const struct cpumask *(*target_cpus)(void); + cpumask_t (*target_cpus)(void); int int_delivery_mode; int int_dest_mode; int ESR_DISABLE; @@ -57,16 +57,12 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + cpumask_t (*vector_allocation_domain)(int cpu); #ifdef CONFIG_SMP /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); + void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif @@ -118,7 +114,6 @@ struct genapic { APICFUNC(get_apic_id) \ .apic_id_mask = APIC_ID_MASK, \ APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(cpu_mask_to_apicid_and) \ APICFUNC(vector_allocation_domain) \ APICFUNC(acpi_madt_oem_check) \ IPIFUNC(send_IPI_mask) \ diff --git a/trunk/arch/x86/include/asm/genapic_64.h b/trunk/arch/x86/include/asm/genapic_64.h index adf32fb56aa6..2cae011668b7 100644 --- a/trunk/arch/x86/include/asm/genapic_64.h +++ b/trunk/arch/x86/include/asm/genapic_64.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H -#include - /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -20,20 +18,16 @@ struct genapic { u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); - const struct cpumask *(*target_cpus)(void); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + cpumask_t (*target_cpus)(void); + cpumask_t (*vector_allocation_domain)(int cpu); void (*init_apic_ldr)(void); /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); + void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); /* */ - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/trunk/arch/x86/include/asm/ipi.h b/trunk/arch/x86/include/asm/ipi.h index c745a306f7d3..f89dffb28aa9 100644 --- a/trunk/arch/x86/include/asm/ipi.h +++ b/trunk/arch/x86/include/asm/ipi.h @@ -117,8 +117,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(const struct cpumask *mask, - int vector) +static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; @@ -129,29 +128,11 @@ static inline void send_IPI_mask_sequence(const struct cpumask *mask, * - mbligh */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static inline void send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); -} - #endif /* _ASM_X86_IPI_H */ diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h index 592688ed04d3..28e409fc73f3 100644 --- a/trunk/arch/x86/include/asm/irq.h +++ b/trunk/arch/x86/include/asm/irq.h @@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include -extern void fixup_irqs(void); +extern void fixup_irqs(cpumask_t map); #endif extern unsigned int do_IRQ(struct pt_regs *regs); @@ -42,6 +42,5 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); -extern int vector_used_by_percpu_irq(unsigned int vector); #endif /* _ASM_X86_IRQ_H */ diff --git a/trunk/arch/x86/include/asm/kvm_host.h b/trunk/arch/x86/include/asm/kvm_host.h index 97215a458e5f..8346be87cfa1 100644 --- a/trunk/arch/x86/include/asm/kvm_host.h +++ b/trunk/arch/x86/include/asm/kvm_host.h @@ -21,7 +21,6 @@ #include #include -#include #define KVM_MAX_VCPUS 16 #define KVM_MEMORY_SLOTS 32 @@ -87,7 +86,6 @@ #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 -#define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 extern spinlock_t kvm_lock; @@ -182,8 +180,6 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; - struct list_head oos_link; - /* * The following two entries are used to key the shadow page in the * hash table. @@ -194,16 +190,13 @@ struct kvm_mmu_page { u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - /* - * One bit set per slot which has memory - * in this shadow page. - */ - DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + unsigned long slot_bitmap; /* One bit set per slot which has memory + * in this shadow page. + */ int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; - bool global; - unsigned int unsync_children; + bool unsync_children; union { u64 *parent_pte; /* !multimapped */ struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ @@ -334,10 +327,8 @@ struct kvm_vcpu_arch { bool nmi_pending; bool nmi_injected; - bool nmi_window_open; - struct mtrr_state_type mtrr_state; - u32 pat; + u64 mtrr[0x100]; }; struct kvm_mem_alias { @@ -359,13 +350,11 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; - struct list_head oos_global_pages; struct dmar_domain *intel_iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; struct hlist_head irq_ack_notifier_list; - int vapics_in_nmi_mode; int round_robin_prev_vcpu; unsigned int tss_addr; @@ -389,7 +378,6 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 mmu_unsync; - u32 mmu_unsync_global; u32 remote_tlb_flush; u32 lpages; }; @@ -409,7 +397,6 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; - u32 request_nmi_exits; u32 irq_exits; u32 host_state_reload; u32 efer_reload; @@ -418,7 +405,6 @@ struct kvm_vcpu_stat { u32 insn_emulation_fail; u32 hypercalls; u32 irq_injections; - u32 nmi_injections; }; struct descriptor_table { @@ -491,7 +477,6 @@ struct kvm_x86_ops { int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); - int (*get_mt_mask_shift)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -505,7 +490,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); @@ -602,14 +587,12 @@ unsigned long segment_base(u16 selector); void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes, - bool guest_initiated); + const u8 *new, int bytes); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); @@ -624,8 +607,6 @@ void kvm_disable_tdp(void); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); -struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); - static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); @@ -721,6 +702,18 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } +#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" +#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" +#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" +#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" +#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" +#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" +#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" +#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" + #define MSR_IA32_TIME_STAMP_COUNTER 0x010 #define TSS_IOPB_BASE_OFFSET 0x66 diff --git a/trunk/arch/x86/include/asm/kvm_x86_emulate.h b/trunk/arch/x86/include/asm/kvm_x86_emulate.h index 6a159732881a..25179a29f208 100644 --- a/trunk/arch/x86/include/asm/kvm_x86_emulate.h +++ b/trunk/arch/x86/include/asm/kvm_x86_emulate.h @@ -123,7 +123,6 @@ struct decode_cache { u8 ad_bytes; u8 rex_prefix; struct operand src; - struct operand src2; struct operand dst; bool has_seg_override; u8 seg_override; @@ -147,18 +146,22 @@ struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; + /* Linear faulting address (if emulating a page-faulting instruction) */ unsigned long eflags; + /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; + u32 cs_base; /* decode cache */ + struct decode_cache decode; }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -167,7 +170,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ /* Host execution mode. */ -#if defined(CONFIG_X86_32) +#if defined(__i386__) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 #elif defined(CONFIG_X86_64) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 diff --git a/trunk/arch/x86/include/asm/mach-default/mach_apic.h b/trunk/arch/x86/include/asm/mach-default/mach_apic.h index cc09cbbee27e..6cb3a467e067 100644 --- a/trunk/arch/x86/include/asm/mach-default/mach_apic.h +++ b/trunk/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,12 +8,12 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const struct cpumask *target_cpus(void) +static inline cpumask_t target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_mask; + return cpu_online_map; #else - return cpumask_of(0); + return cpumask_of_cpu(0); #endif } @@ -28,7 +28,6 @@ static inline const struct cpumask *target_cpus(void) #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) @@ -62,19 +61,9 @@ static inline int apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { - return cpumask_bits(cpumask)[0]; -} - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - - return (unsigned int)(mask1 & mask2 & mask3); + return cpus_addr(cpumask)[0]; } static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) @@ -99,7 +88,7 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) +static inline cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -109,7 +98,8 @@ static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } #endif @@ -141,7 +131,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; diff --git a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h index 191312d155da..fabca01ebacf 100644 --- a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,8 +4,7 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_bitmask(cpumask_t mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; @@ -13,27 +12,28 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include #define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_bitmask(mask, vector); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __local_send_IPI_allbutself(int vector) { - if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask_allbutself(cpu_online_mask, vector); - else + if (no_broadcast || vector == NMI_VECTOR) { + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + send_IPI_mask(mask, vector); + } else __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void __local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector); } diff --git a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h index 48553e958ad5..e430f47df667 100644 --- a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h @@ -24,7 +24,6 @@ #define check_phys_apicid_present (genapic->check_phys_apicid_present) #define check_apicid_used (genapic->check_apicid_used) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define vector_allocation_domain (genapic->vector_allocation_domain) #define enable_apic_mode (genapic->enable_apic_mode) #define phys_pkg_id (genapic->phys_pkg_id) diff --git a/trunk/arch/x86/include/asm/mpspec.h b/trunk/arch/x86/include/asm/mpspec.h index 62d14ce3cd00..91885c28f66b 100644 --- a/trunk/arch/x86/include/asm/mpspec.h +++ b/trunk/arch/x86/include/asm/mpspec.h @@ -6,13 +6,13 @@ #include extern int apic_version[MAX_APICS]; -extern int pic_mode; #ifdef CONFIG_X86_32 #include extern unsigned int def_to_bigsmp; extern u8 apicid_2_node[]; +extern int pic_mode; #ifdef CONFIG_X86_NUMAQ extern int mp_bus_id_to_node[MAX_MP_BUSSES]; diff --git a/trunk/arch/x86/include/asm/mtrr.h b/trunk/arch/x86/include/asm/mtrr.h index cb988aab716d..7c1e4258b31e 100644 --- a/trunk/arch/x86/include/asm/mtrr.h +++ b/trunk/arch/x86/include/asm/mtrr.h @@ -57,31 +57,6 @@ struct mtrr_gentry { }; #endif /* !__i386__ */ -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - -#define MTRR_NUM_FIXED_RANGES 88 -#define MTRR_MAX_VAR_RANGES 256 - -struct mtrr_state_type { - struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; - mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - -#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) -#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) - /* These are the various ioctls */ #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) diff --git a/trunk/arch/x86/include/asm/numaq/apic.h b/trunk/arch/x86/include/asm/numaq/apic.h index c80f00d29965..0bf2a06b7a4e 100644 --- a/trunk/arch/x86/include/asm/numaq/apic.h +++ b/trunk/arch/x86/include/asm/numaq/apic.h @@ -7,9 +7,9 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { - return &CPU_MASK_ALL; + return CPU_MASK_ALL; } #define NO_BALANCE_IRQ (1) @@ -122,13 +122,7 @@ static inline void enable_apic_mode(void) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return (int) 0xF; -} - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { return (int) 0xF; } diff --git a/trunk/arch/x86/include/asm/numaq/ipi.h b/trunk/arch/x86/include/asm/numaq/ipi.h index a8374c652778..935588d286cf 100644 --- a/trunk/arch/x86/include/asm/numaq/ipi.h +++ b/trunk/arch/x86/include/asm/numaq/ipi.h @@ -1,22 +1,25 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/trunk/arch/x86/include/asm/smp.h b/trunk/arch/x86/include/asm/smp.h index 830b9fcb6427..d12811ce51d9 100644 --- a/trunk/arch/x86/include/asm/smp.h +++ b/trunk/arch/x86/include/asm/smp.h @@ -60,7 +60,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(const struct cpumask *mask); + void (*send_call_func_ipi)(cpumask_t mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) static inline void arch_send_call_function_ipi(cpumask_t mask) { - smp_ops.send_call_func_ipi(&mask); + smp_ops.send_call_func_ipi(mask); } void cpu_disable_common(void); @@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(const struct cpumask *mask); +void native_send_call_func_ipi(cpumask_t mask); void native_send_call_func_single_ipi(int cpu); extern void prefill_possible_map(void); diff --git a/trunk/arch/x86/include/asm/summit/apic.h b/trunk/arch/x86/include/asm/summit/apic.h index 99327d1be49f..9b3070f1c2ac 100644 --- a/trunk/arch/x86/include/asm/summit/apic.h +++ b/trunk/arch/x86/include/asm/summit/apic.h @@ -14,13 +14,13 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing * Just start on cpu 0. IRQ balancing will spread load */ - return &cpumask_of_cpu(0); + return cpumask_of_cpu(0); } #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -137,14 +137,14 @@ static inline void enable_apic_mode(void) { } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(*cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return (int) 0xFF; @@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(*cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -170,49 +170,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid = 0xFF; - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return (int) 0xFF; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } -exit: - free_cpumask_var(cpumask); - return apicid; -} - /* cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. diff --git a/trunk/arch/x86/include/asm/summit/ipi.h b/trunk/arch/x86/include/asm/summit/ipi.h index a8a2c24f50cc..53bd1e7bd7b4 100644 --- a/trunk/arch/x86/include/asm/summit/ipi.h +++ b/trunk/arch/x86/include/asm/summit/ipi.h @@ -1,10 +1,9 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -15,12 +14,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/trunk/arch/x86/include/asm/sys_ia32.h b/trunk/arch/x86/include/asm/sys_ia32.h deleted file mode 100644 index ffb08be2a530..000000000000 --- a/trunk/arch/x86/include/asm/sys_ia32.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * sys_ia32.h - Linux ia32 syscall interfaces - * - * Copyright (c) 2008 Jaswinder Singh Rajput - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#ifndef _ASM_X86_SYS_IA32_H -#define _ASM_X86_SYS_IA32_H - -#include -#include -#include -#include -#include -#include - -/* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); -asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); - -asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); -asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, char __user *, - struct stat64 __user *, int); -struct mmap_arg_struct; -asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); -asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); - -asmlinkage long sys32_pipe(int __user *); -struct sigaction32; -struct old_sigaction32; -asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, - struct sigaction32 __user *, unsigned int); -asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, - struct old_sigaction32 __user *); -asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, - compat_sigset_t __user *, unsigned int); -asmlinkage long sys32_alarm(unsigned int); - -struct sel_arg_struct; -asmlinkage long sys32_old_select(struct sel_arg_struct __user *); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); -asmlinkage long sys32_sysfs(int, u32, u32); - -asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, - struct compat_timespec __user *); -asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); -asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); - -#ifdef CONFIG_SYSCTL_SYSCALL -struct sysctl_ia32; -asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); -#endif - -asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); - -asmlinkage long sys32_personality(unsigned long); -asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); - -asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); - -struct oldold_utsname; -struct old_utsname; -asmlinkage long sys32_olduname(struct oldold_utsname __user *); -long sys32_uname(struct old_utsname __user *); - -long sys32_ustat(unsigned, struct ustat32 __user *); - -asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, - compat_uptr_t __user *, struct pt_regs *); -asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); - -long sys32_lseek(unsigned int, int, unsigned int); -long sys32_kill(int, int); -long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); -long sys32_vm86_warning(void); -long sys32_lookup_dcookie(u32, u32, char __user *, size_t); - -asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); -asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, - unsigned, unsigned, int); -asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); -asmlinkage long sys32_fallocate(int, int, unsigned, - unsigned, unsigned, unsigned); - -/* ia32/ia32_signal.c */ -asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); -asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *, - stack_ia32_t __user *, struct pt_regs *); -asmlinkage long sys32_sigreturn(struct pt_regs *); -asmlinkage long sys32_rt_sigreturn(struct pt_regs *); - -/* ia32/ipc32.c */ -asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); -#endif /* _ASM_X86_SYS_IA32_H */ diff --git a/trunk/arch/x86/include/asm/topology.h b/trunk/arch/x86/include/asm/topology.h index 79e31e9dcdda..ff386ff50ed7 100644 --- a/trunk/arch/x86/include/asm/topology.h +++ b/trunk/arch/x86/include/asm/topology.h @@ -226,8 +226,6 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) -#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) /* indicates that pointers to the topology cpumask_t maps are valid */ #define arch_provides_topology_pointers yes diff --git a/trunk/arch/x86/include/asm/uv/uv_bau.h b/trunk/arch/x86/include/asm/uv/uv_bau.h index 50423c7b56b2..e2363253bbbf 100644 --- a/trunk/arch/x86/include/asm/uv/uv_bau.h +++ b/trunk/arch/x86/include/asm/uv/uv_bau.h @@ -133,61 +133,61 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - unsigned int dest_subnodeid:6; /* must be zero */ + int dest_subnodeid:6; /* must be zero */ /* bits 5:0 */ - unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ - /* bits 20:6 */ /* first bit in node_map */ - unsigned int command:8; /* message type */ + int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ + /* bits 20:6 */ + int command:8; /* message type */ /* bits 28:21 */ /* 0x38: SN3net EndPoint Message */ - unsigned int rsvd_1:3; /* must be zero */ + int rsvd_1:3; /* must be zero */ /* bits 31:29 */ /* int will align on 32 bits */ - unsigned int rsvd_2:9; /* must be zero */ + int rsvd_2:9; /* must be zero */ /* bits 40:32 */ /* Suppl_A is 56-41 */ - unsigned int payload_2a:8;/* becomes byte 16 of msg */ + int payload_2a:8; /* becomes byte 16 of msg */ /* bits 48:41 */ /* not currently using */ - unsigned int payload_2b:8;/* becomes byte 17 of msg */ + int payload_2b:8; /* becomes byte 17 of msg */ /* bits 56:49 */ /* not currently using */ /* Address field (96:57) is never used as an address (these are address bits 42:3) */ - unsigned int rsvd_3:1; /* must be zero */ + int rsvd_3:1; /* must be zero */ /* bit 57 */ /* address bits 27:4 are payload */ /* these 24 bits become bytes 12-14 of msg */ - unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ + int replied_to:1; /* sent as 0 by the source to byte 12 */ /* bit 58 */ - unsigned int payload_1a:5;/* not currently used */ + int payload_1a:5; /* not currently used */ /* bits 63:59 */ - unsigned int payload_1b:8;/* not currently used */ + int payload_1b:8; /* not currently used */ /* bits 71:64 */ - unsigned int payload_1c:8;/* not currently used */ + int payload_1c:8; /* not currently used */ /* bits 79:72 */ - unsigned int payload_1d:2;/* not currently used */ + int payload_1d:2; /* not currently used */ /* bits 81:80 */ - unsigned int rsvd_4:7; /* must be zero */ + int rsvd_4:7; /* must be zero */ /* bits 88:82 */ - unsigned int sw_ack_flag:1;/* software acknowledge flag */ + int sw_ack_flag:1; /* software acknowledge flag */ /* bit 89 */ /* INTD trasactions at destination are to wait for software acknowledge */ - unsigned int rsvd_5:6; /* must be zero */ + int rsvd_5:6; /* must be zero */ /* bits 95:90 */ - unsigned int rsvd_6:5; /* must be zero */ + int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ + int int_both:1; /* if 1, interrupt both sockets on the blade */ /* bit 101*/ - unsigned int fairness:3;/* usually zero */ + int fairness:3; /* usually zero */ /* bits 104:102 */ - unsigned int multilevel:1; /* multi-level multicast format */ + int multilevel:1; /* multi-level multicast format */ /* bit 105 */ /* 0 for TLB: endpoint multi-unicast messages */ - unsigned int chaining:1;/* next descriptor is part of this activation*/ + int chaining:1; /* next descriptor is part of this activation*/ /* bit 106 */ - unsigned int rsvd_7:21; /* must be zero */ + int rsvd_7:21; /* must be zero */ /* bits 127:107 */ }; diff --git a/trunk/arch/x86/include/asm/virtext.h b/trunk/arch/x86/include/asm/virtext.h deleted file mode 100644 index 593636275238..000000000000 --- a/trunk/arch/x86/include/asm/virtext.h +++ /dev/null @@ -1,132 +0,0 @@ -/* CPU virtualization extensions handling - * - * This should carry the code for handling CPU virtualization extensions - * that needs to live in the kernel core. - * - * Author: Eduardo Habkost - * - * Copyright (C) 2008, Red Hat Inc. - * - * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ -#ifndef _ASM_X86_VIRTEX_H -#define _ASM_X86_VIRTEX_H - -#include -#include - -#include -#include - -/* - * VMX functions: - */ - -static inline int cpu_has_vmx(void) -{ - unsigned long ecx = cpuid_ecx(1); - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ -} - - -/** Disable VMX on the current CPU - * - * vmxoff causes a undefined-opcode exception if vmxon was not run - * on the CPU previously. Only call this function if you know VMX - * is enabled. - */ -static inline void cpu_vmxoff(void) -{ - asm volatile (ASM_VMX_VMXOFF : : : "cc"); - write_cr4(read_cr4() & ~X86_CR4_VMXE); -} - -static inline int cpu_vmx_enabled(void) -{ - return read_cr4() & X86_CR4_VMXE; -} - -/** Disable VMX if it is enabled on the current CPU - * - * You shouldn't call this if cpu_has_vmx() returns 0. - */ -static inline void __cpu_emergency_vmxoff(void) -{ - if (cpu_vmx_enabled()) - cpu_vmxoff(); -} - -/** Disable VMX if it is supported and enabled on the current CPU - */ -static inline void cpu_emergency_vmxoff(void) -{ - if (cpu_has_vmx()) - __cpu_emergency_vmxoff(); -} - - - - -/* - * SVM functions: - */ - -/** Check if the CPU has SVM support - * - * You can use the 'msg' arg to get a message describing the problem, - * if the function returns zero. Simply pass NULL if you are not interested - * on the messages; gcc should take care of not generating code for - * the messages on this case. - */ -static inline int cpu_has_svm(const char **msg) -{ - uint32_t eax, ebx, ecx, edx; - - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - if (msg) - *msg = "not amd"; - return 0; - } - - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - if (eax < SVM_CPUID_FUNC) { - if (msg) - *msg = "can't execute cpuid_8000000a"; - return 0; - } - - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { - if (msg) - *msg = "svm not available"; - return 0; - } - return 1; -} - - -/** Disable SVM on the current CPU - * - * You should call this only if cpu_has_svm() returned true. - */ -static inline void cpu_svm_disable(void) -{ - uint64_t efer; - - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); -} - -/** Makes sure SVM is disabled, if it is supported on the CPU - */ -static inline void cpu_emergency_svm_disable(void) -{ - if (cpu_has_svm(NULL)) - cpu_svm_disable(); -} - -#endif /* _ASM_X86_VIRTEX_H */ diff --git a/trunk/arch/x86/kernel/amd_iommu.c b/trunk/arch/x86/kernel/amd_iommu.c index 658e29e0f49b..2e2da717b350 100644 --- a/trunk/arch/x86/kernel/amd_iommu.c +++ b/trunk/arch/x86/kernel/amd_iommu.c @@ -1296,7 +1296,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) * we don't need to preallocate the protection domains anymore. * For now we have to. */ -static void prealloc_protection_domains(void) +void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; diff --git a/trunk/arch/x86/kernel/amd_iommu_init.c b/trunk/arch/x86/kernel/amd_iommu_init.c index fb85e8d466cc..c625800c55ca 100644 --- a/trunk/arch/x86/kernel/amd_iommu_init.c +++ b/trunk/arch/x86/kernel/amd_iommu_init.c @@ -243,7 +243,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) } /* Function to enable the hardware */ -static void __init iommu_enable(struct amd_iommu *iommu) +void __init iommu_enable(struct amd_iommu *iommu) { printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " "at %02x:%02x.%x cap 0x%hx\n", @@ -256,7 +256,7 @@ static void __init iommu_enable(struct amd_iommu *iommu) } /* Function to enable IOMMU event logging and event interrupts */ -static void __init iommu_enable_event_logging(struct amd_iommu *iommu) +void __init iommu_enable_event_logging(struct amd_iommu *iommu) { iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); diff --git a/trunk/arch/x86/kernel/apic.c b/trunk/arch/x86/kernel/apic.c index d652515e2855..b5229affb953 100644 --- a/trunk/arch/x86/kernel/apic.c +++ b/trunk/arch/x86/kernel/apic.c @@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer); #ifdef HAVE_X2APIC int x2apic; /* x2apic enabled before OS handover */ -static int x2apic_preenabled; -static int disable_x2apic; +int x2apic_preenabled; +int disable_x2apic; static __init int setup_nox2apic(char *str) { disable_x2apic = 1; @@ -119,6 +119,8 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); int first_system_vector = 0xfe; +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Debug level, exported for io_apic.c */ @@ -140,7 +142,7 @@ static int lapic_next_event(unsigned long delta, struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); -static void lapic_timer_broadcast(const cpumask_t *mask); +static void lapic_timer_broadcast(cpumask_t mask); static void apic_pm_activate(void); /* @@ -226,7 +228,7 @@ void xapic_icr_write(u32 low, u32 id) apic_write(APIC_ICR, low); } -static u64 xapic_icr_read(void) +u64 xapic_icr_read(void) { u32 icr1, icr2; @@ -266,7 +268,7 @@ void x2apic_icr_write(u32 low, u32 id) wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); } -static u64 x2apic_icr_read(void) +u64 x2apic_icr_read(void) { unsigned long val; @@ -453,7 +455,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, /* * Local APIC timer broadcast function */ -static void lapic_timer_broadcast(const cpumask_t *mask) +static void lapic_timer_broadcast(cpumask_t mask) { #ifdef CONFIG_SMP send_IPI_mask(mask, LOCAL_TIMER_VECTOR); @@ -469,7 +471,7 @@ static void __cpuinit setup_APIC_timer(void) struct clock_event_device *levt = &__get_cpu_var(lapic_events); memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of(smp_processor_id()); + levt->cpumask = cpumask_of_cpu(smp_processor_id()); clockevents_register_device(levt); } @@ -1805,32 +1807,28 @@ void disconnect_bsp_APIC(int virt_wire_setup) void __cpuinit generic_processor_info(int apicid, int version) { int cpu; + cpumask_t tmp_map; /* * Validate version */ if (version == 0x0) { pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); + "fixing up to 0x10. (tell your hw vendor)\n", + version); version = 0x10; } apic_version[apicid] = version; - if (num_processors >= nr_cpu_ids) { - int max = nr_cpu_ids; - int thiscpu = max + disabled_cpus; - - pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i reached." - " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; + if (num_processors >= NR_CPUS) { + pr_warning("WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); return; } num_processors++; - cpu = cpumask_next_zero(-1, cpu_present_mask); + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { @@ -1880,8 +1878,8 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif - set_cpu_possible(cpu, true); - set_cpu_present(cpu, true); + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } #ifdef CONFIG_X86_64 @@ -2083,7 +2081,7 @@ __cpuinit int apic_is_clustered_box(void) bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { /* are we being called early in kernel startup? */ if (bios_cpu_apicid) { id = bios_cpu_apicid[i]; diff --git a/trunk/arch/x86/kernel/bios_uv.c b/trunk/arch/x86/kernel/bios_uv.c index f63882728d91..2a0a2a3cac26 100644 --- a/trunk/arch/x86/kernel/bios_uv.c +++ b/trunk/arch/x86/kernel/bios_uv.c @@ -25,7 +25,7 @@ #include #include -static struct uv_systab uv_systab; +struct uv_systab uv_systab; s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c index c6ecda64f5f1..68b5d8681cbb 100644 --- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -534,16 +534,31 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) per_cpu(cpuid4_info, cpu) = NULL; } -static void get_cpu_leaves(void *_retval) +static int __cpuinit detect_cache_attributes(unsigned int cpu) { - int j, *retval = _retval, cpu = smp_processor_id(); + struct _cpuid4_info *this_leaf; + unsigned long j; + int retval; + cpumask_t oldmask; + + if (num_cache_leaves == 0) + return -ENOENT; + + per_cpu(cpuid4_info, cpu) = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (per_cpu(cpuid4_info, cpu) == NULL) + return -ENOMEM; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + if (retval) + goto out; /* Do cpuid and store the results */ for (j = 0; j < num_cache_leaves; j++) { - struct _cpuid4_info *this_leaf; this_leaf = CPUID4_INFO_IDX(cpu, j); - *retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(*retval < 0)) { + retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(retval < 0)) { int i; for (i = 0; i < j; i++) @@ -552,21 +567,9 @@ static void get_cpu_leaves(void *_retval) } cache_shared_cpu_map_setup(cpu, j); } -} - -static int __cpuinit detect_cache_attributes(unsigned int cpu) -{ - int retval; - - if (num_cache_leaves == 0) - return -ENOENT; - - per_cpu(cpuid4_info, cpu) = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(cpuid4_info, cpu) == NULL) - return -ENOMEM; + set_cpus_allowed_ptr(current, &oldmask); - smp_call_function_single(cpu, get_cpu_leaves, &retval, true); +out: if (retval) { kfree(per_cpu(cpuid4_info, cpu)); per_cpu(cpuid4_info, cpu) = NULL; @@ -623,8 +626,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, cpumask_t *mask = &this_leaf->shared_cpu_map; n = type? - cpulist_scnprintf(buf, len-2, mask) : - cpumask_scnprintf(buf, len-2, mask); + cpulist_scnprintf(buf, len-2, *mask): + cpumask_scnprintf(buf, len-2, *mask); buf[n++] = '\n'; buf[n] = '\0'; } diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index a5a5e0530370..748c8f9e7a05 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -83,41 +83,34 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ * CPU Initialization */ -struct thresh_restart { - struct threshold_block *b; - int reset; - u16 old_limit; -}; - /* must be called with correct cpu affinity */ -static long threshold_restart_bank(void *_tr) +static void threshold_restart_bank(struct threshold_block *b, + int reset, u16 old_limit) { - struct thresh_restart *tr = _tr; u32 mci_misc_hi, mci_misc_lo; - rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + rdmsr(b->address, mci_misc_lo, mci_misc_hi); - if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ + if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + reset = 1; /* limit cannot be lower than err count */ - if (tr->reset) { /* reset err count and overflow bit */ + if (reset) { /* reset err count and overflow bit */ mci_misc_hi = (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); - } else if (tr->old_limit) { /* change limit w/o reset */ + (THRESHOLD_MAX - b->threshold_limit); + } else if (old_limit) { /* change limit w/o reset */ int new_count = (mci_misc_hi & THRESHOLD_MAX) + - (tr->old_limit - tr->b->threshold_limit); + (old_limit - b->threshold_limit); mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } - tr->b->interrupt_enable ? + b->interrupt_enable ? (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : (mci_misc_hi &= ~MASK_INT_TYPE_HI); mci_misc_hi |= MASK_COUNT_EN_HI; - wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); - return 0; + wrmsr(b->address, mci_misc_lo, mci_misc_hi); } /* cpu init entry point, called from mce.c with preempt off */ @@ -127,7 +120,6 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u8 lvt_off; u32 low = 0, high = 0, address = 0; - struct thresh_restart tr; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -170,10 +162,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) wrmsr(address, low, high); threshold_defaults.address = address; - tr.b = &threshold_defaults; - tr.reset = 0; - tr.old_limit = 0; - threshold_restart_bank(&tr); + threshold_restart_bank(&threshold_defaults, 0, 0); } } } @@ -262,6 +251,20 @@ struct threshold_attr { ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; +static void affinity_set(unsigned int cpu, cpumask_t *oldmask, + cpumask_t *newmask) +{ + *oldmask = current->cpus_allowed; + cpus_clear(*newmask); + cpu_set(cpu, *newmask); + set_cpus_allowed_ptr(current, newmask); +} + +static void affinity_restore(const cpumask_t *oldmask) +{ + set_cpus_allowed_ptr(current, oldmask); +} + #define SHOW_FIELDS(name) \ static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ { \ @@ -274,16 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { char *end; - struct thresh_restart tr; + cpumask_t oldmask, newmask; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; b->interrupt_enable = !!new; - tr.b = b; - tr.reset = 0; - tr.old_limit = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 0, 0); + affinity_restore(&oldmask); return end - buf; } @@ -292,7 +294,8 @@ static ssize_t store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { char *end; - struct thresh_restart tr; + cpumask_t oldmask, newmask; + u16 old; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; @@ -300,36 +303,34 @@ static ssize_t store_threshold_limit(struct threshold_block *b, new = THRESHOLD_MAX; if (new < 1) new = 1; - tr.old_limit = b->threshold_limit; + old = b->threshold_limit; b->threshold_limit = new; - tr.b = b; - tr.reset = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 0, old); + affinity_restore(&oldmask); return end - buf; } -static long local_error_count(void *_b) -{ - struct threshold_block *b = _b; - u32 low, high; - - rdmsr(b->address, low, high); - return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); -} - static ssize_t show_error_count(struct threshold_block *b, char *buf) { - return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); + u32 high, low; + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); + rdmsr(b->address, low, high); + affinity_restore(&oldmask); + return sprintf(buf, "%x\n", + (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); } static ssize_t store_error_count(struct threshold_block *b, const char *buf, size_t count) { - struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; - - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 1, 0); + affinity_restore(&oldmask); return 1; } @@ -462,19 +463,12 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, return err; } -static long local_allocate_threshold_blocks(void *_bank) -{ - unsigned int *bank = _bank; - - return allocate_threshold_blocks(smp_processor_id(), *bank, 0, - MSR_IA32_MC0_MISC + *bank * 4); -} - /* symlinks sibling shared banks to first core. first core owns dir/files. */ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; + cpumask_t oldmask, newmask; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -525,7 +519,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); + affinity_set(cpu, &oldmask, &newmask); + err = allocate_threshold_blocks(cpu, bank, 0, + MSR_IA32_MC0_MISC + bank * 4); + affinity_restore(&oldmask); + if (err) goto out_free; diff --git a/trunk/arch/x86/kernel/cpu/mtrr/generic.c b/trunk/arch/x86/kernel/cpu/mtrr/generic.c index b59ddcc88cd8..4e8d77f01eeb 100644 --- a/trunk/arch/x86/kernel/cpu/mtrr/generic.c +++ b/trunk/arch/x86/kernel/cpu/mtrr/generic.c @@ -14,6 +14,14 @@ #include #include "mtrr.h" +struct mtrr_state { + struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; + mtrr_type fixed_ranges[NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + struct fixed_range_block { int base_msr; /* start address of an MTRR block */ int ranges; /* number of MTRRs in this block */ @@ -27,12 +35,10 @@ static struct fixed_range_block fixed_range_blocks[] = { }; static unsigned long smp_changes_mask; +static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; u64 mtrr_tom2; -struct mtrr_state_type mtrr_state = {}; -EXPORT_SYMBOL_GPL(mtrr_state); - #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." diff --git a/trunk/arch/x86/kernel/cpu/mtrr/main.c b/trunk/arch/x86/kernel/cpu/mtrr/main.c index d259e5d2e054..1159e269e596 100644 --- a/trunk/arch/x86/kernel/cpu/mtrr/main.c +++ b/trunk/arch/x86/kernel/cpu/mtrr/main.c @@ -49,7 +49,7 @@ u32 num_var_ranges = 0; -unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; +unsigned int mtrr_usage_table[MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; @@ -574,7 +574,7 @@ struct mtrr_value { unsigned long lsize; }; -static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; +static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { @@ -824,14 +824,16 @@ static int enable_mtrr_cleanup __initdata = static int __init disable_mtrr_cleanup_setup(char *str) { - enable_mtrr_cleanup = 0; + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 0; return 0; } early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); static int __init enable_mtrr_cleanup_setup(char *str) { - enable_mtrr_cleanup = 1; + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 1; return 0; } early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); diff --git a/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h b/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h index ffd60409cc6d..2dc4ec656b23 100644 --- a/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -8,6 +8,11 @@ #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + +#define NUM_FIXED_RANGES 88 +#define MAX_VAR_RANGES 256 #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -24,7 +29,11 @@ #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 -extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; struct mtrr_ops { u32 vendor; @@ -61,6 +70,13 @@ struct set_mtrr_context { u32 ccr3; }; +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); diff --git a/trunk/arch/x86/kernel/cpuid.c b/trunk/arch/x86/kernel/cpuid.c index 85d28d53f5d3..72cefd1e649b 100644 --- a/trunk/arch/x86/kernel/cpuid.c +++ b/trunk/arch/x86/kernel/cpuid.c @@ -39,10 +39,10 @@ #include #include #include -#include #include #include +#include #include static struct class *cpuid_class; @@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) } static ssize_t cpuid_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t * ppos) { char __user *tmp = buf; struct cpuid_regs cmd; @@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; int ret = 0; - + lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); diff --git a/trunk/arch/x86/kernel/crash.c b/trunk/arch/x86/kernel/crash.c index c689d19e35ab..d84a852e4cd7 100644 --- a/trunk/arch/x86/kernel/crash.c +++ b/trunk/arch/x86/kernel/crash.c @@ -26,7 +26,6 @@ #include #include #include -#include #include @@ -50,15 +49,6 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) #endif crash_save_cpu(regs, cpu); - /* Disable VMX or SVM if needed. - * - * We need to disable virtualization on all CPUs. - * Having VMX or SVM enabled on any CPU may break rebooting - * after the kdump kernel has finished its task. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - disable_local_APIC(); } @@ -90,14 +80,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs) local_irq_disable(); kdump_nmi_shootdown_cpus(); - - /* Booting kdump kernel with VMX or SVM enabled won't work, - * because (among other limitations) we can't disable paging - * with the virt flags. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - lapic_shutdown(); #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); diff --git a/trunk/arch/x86/kernel/early_printk.c b/trunk/arch/x86/kernel/early_printk.c index 504ad198e4ad..23b138e31e9c 100644 --- a/trunk/arch/x86/kernel/early_printk.c +++ b/trunk/arch/x86/kernel/early_printk.c @@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...) va_list ap; va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); + n = vscnprintf(buf, 512, fmt, ap); early_console->write(early_console, buf, n); va_end(ap); } diff --git a/trunk/arch/x86/kernel/genapic_flat_64.c b/trunk/arch/x86/kernel/genapic_flat_64.c index 34185488e4fb..c0262791bda4 100644 --- a/trunk/arch/x86/kernel/genapic_flat_64.c +++ b/trunk/arch/x86/kernel/genapic_flat_64.c @@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static const struct cpumask *flat_target_cpus(void) +static cpumask_t flat_target_cpus(void) { - return cpu_online_mask; + return cpu_online_map; } -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t flat_vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -45,8 +45,8 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } /* @@ -69,8 +69,9 @@ static void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void _flat_send_IPI_mask(unsigned long mask, int vector) +static void flat_send_IPI_mask(cpumask_t cpumask, int vector) { + unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); @@ -78,41 +79,20 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) local_irq_restore(flags); } -static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - - _flat_send_IPI_mask(mask, vector); -} - -static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - int cpu = smp_processor_id(); - - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); - _flat_send_IPI_mask(mask, vector); -} - static void flat_send_IPI_allbutself(int vector) { - int cpu = smp_processor_id(); #ifdef CONFIG_HOTPLUG_CPU int hotplug = 1; #else int hotplug = 0; #endif if (hotplug || vector == NMI_VECTOR) { - if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { - unsigned long mask = cpumask_bits(cpu_online_mask)[0]; + cpumask_t allbutme = cpu_online_map; - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + cpu_clear(smp_processor_id(), allbutme); - _flat_send_IPI_mask(mask, vector); - } + if (!cpus_empty(allbutme)) + flat_send_IPI_mask(allbutme, vector); } else if (num_online_cpus() > 1) { __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } @@ -121,7 +101,7 @@ static void flat_send_IPI_allbutself(int vector) static void flat_send_IPI_all(int vector) { if (vector == NMI_VECTOR) - flat_send_IPI_mask(cpu_online_mask, vector); + flat_send_IPI_mask(cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } @@ -155,18 +135,9 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; -} - -static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) { - unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; - unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; - - return mask1 & mask2; + return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; } static unsigned int phys_pkg_id(int index_msb) @@ -186,10 +157,8 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, @@ -219,39 +188,35 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static const struct cpumask *physflat_target_cpus(void) +static cpumask_t physflat_target_cpus(void) { - return cpu_online_mask; + return cpu_online_map; } -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t physflat_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + return cpumask_of_cpu(cpu); } -static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) +static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); } -static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - send_IPI_mask_allbutself(cpumask, vector); -} - static void physflat_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t allbutme = cpu_online_map; + + cpu_clear(smp_processor_id(), allbutme); + physflat_send_IPI_mask(allbutme, vector); } static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_mask, vector); + physflat_send_IPI_mask(cpu_online_map, vector); } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -259,31 +224,13 @@ static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - struct genapic apic_physflat = { .name = "physical flat", .acpi_madt_oem_check = physflat_acpi_madt_oem_check, @@ -296,10 +243,8 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_cluster.c b/trunk/arch/x86/kernel/genx2apic_cluster.c index 6ce497cc372d..f6a2c8eb48a6 100644 --- a/trunk/arch/x86/kernel/genx2apic_cluster.c +++ b/trunk/arch/x86/kernel/genx2apic_cluster.c @@ -22,18 +22,19 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *x2apic_target_cpus(void) +static cpumask_t x2apic_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } /* * for now each logical cpu is in its own vector allocation domain. */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t x2apic_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -55,53 +56,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, * at once. We have 16 cpu's in a cluster. This will minimize IPI register * writes. */ -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); -} - -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } local_irq_restore(flags); } static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_mask, vector); + x2apic_send_IPI_mask(cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -109,38 +89,21 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. + * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -187,10 +150,8 @@ struct genapic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_phys.c b/trunk/arch/x86/kernel/genx2apic_phys.c index 21bcc0e098ba..d042211768b7 100644 --- a/trunk/arch/x86/kernel/genx2apic_phys.c +++ b/trunk/arch/x86/kernel/genx2apic_phys.c @@ -29,15 +29,16 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *x2apic_target_cpus(void) +static cpumask_t x2apic_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t x2apic_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -53,54 +54,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, x2apic_icr_write(cfg, apicid); } -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu(query_cpu, mask) { + for_each_cpu_mask(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_mask, vector); + x2apic_send_IPI_mask(cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -108,7 +87,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -116,30 +95,13 @@ static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -161,12 +123,12 @@ static unsigned int phys_pkg_id(int index_msb) return current_cpu_data.initial_apicid >> index_msb; } -static void x2apic_send_IPI_self(int vector) +void x2apic_send_IPI_self(int vector) { apic_write(APIC_SELF_IPI, vector); } -static void init_x2apic_ldr(void) +void init_x2apic_ldr(void) { return; } @@ -183,10 +145,8 @@ struct genapic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_uv_x.c b/trunk/arch/x86/kernel/genx2apic_uv_x.c index b193e082f6ce..dece17289731 100644 --- a/trunk/arch/x86/kernel/genx2apic_uv_x.c +++ b/trunk/arch/x86/kernel/genx2apic_uv_x.c @@ -79,15 +79,16 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *uv_target_cpus(void) +static cpumask_t uv_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t uv_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) @@ -126,37 +127,28 @@ static void uv_send_IPI_one(int cpu, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -static void uv_send_IPI_mask(const struct cpumask *mask, int vector) +static void uv_send_IPI_mask(cpumask_t mask, int vector) { unsigned int cpu; - for_each_cpu(cpu, mask) - uv_send_IPI_one(cpu, vector); -} - -static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned int cpu; - unsigned int this_cpu = smp_processor_id(); - - for_each_cpu(cpu, mask) - if (cpu != this_cpu) + for_each_possible_cpu(cpu) + if (cpu_isset(cpu, mask)) uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_allbutself(int vector) { - unsigned int cpu; - unsigned int this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - for_each_online_cpu(cpu) - if (cpu != this_cpu) - uv_send_IPI_one(cpu, vector); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + uv_send_IPI_mask(mask, vector); } static void uv_send_IPI_all(int vector) { - uv_send_IPI_mask(cpu_online_mask, vector); + uv_send_IPI_mask(cpu_online_map, vector); } static int uv_apic_id_registered(void) @@ -168,7 +160,7 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -176,30 +168,13 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -247,10 +222,8 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, - .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/head64.c b/trunk/arch/x86/kernel/head64.c index b9a4d8c4b935..388e05a5fc17 100644 --- a/trunk/arch/x86/kernel/head64.c +++ b/trunk/arch/x86/kernel/head64.c @@ -27,7 +27,7 @@ #include /* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda; +static struct x8664_pda _boot_cpu_pda __read_mostly; #ifdef CONFIG_SMP /* diff --git a/trunk/arch/x86/kernel/hpet.c b/trunk/arch/x86/kernel/hpet.c index cd759ad90690..845ea097383e 100644 --- a/trunk/arch/x86/kernel/hpet.c +++ b/trunk/arch/x86/kernel/hpet.c @@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void) * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ - hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); + hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); clockevents_register_device(&hpet_clockevent); global_clock_event = &hpet_clockevent; printk(KERN_DEBUG "hpet clockevent registered\n"); @@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode, struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); hpet_setup_msi_irq(hdev->irq); disable_irq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); + irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); enable_irq(hdev->irq); } break; @@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) return -1; disable_irq(dev->irq); - irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); + irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); enable_irq(dev->irq); printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", @@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) /* 5 usec minimum reprogramming delta. */ evt->min_delta_ns = 5000; - evt->cpumask = cpumask_of(hdev->cpu); + evt->cpumask = cpumask_of_cpu(hdev->cpu); clockevents_register_device(evt); } diff --git a/trunk/arch/x86/kernel/i8253.c b/trunk/arch/x86/kernel/i8253.c index 10f92fb532f3..c1b5e3ece1f2 100644 --- a/trunk/arch/x86/kernel/i8253.c +++ b/trunk/arch/x86/kernel/i8253.c @@ -114,7 +114,7 @@ void __init setup_pit_timer(void) * Start pit with the boot cpu mask and make it global after the * IO_APIC has been initialized. */ - pit_clockevent.cpumask = cpumask_of(smp_processor_id()); + pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_clockevent.shift); pit_clockevent.max_delta_ns = diff --git a/trunk/arch/x86/kernel/io_apic.c b/trunk/arch/x86/kernel/io_apic.c index 69911722b9d3..74917658b004 100644 --- a/trunk/arch/x86/kernel/io_apic.c +++ b/trunk/arch/x86/kernel/io_apic.c @@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) struct irq_cfg { struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; + cpumask_t domain; + cpumask_t old_domain; unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; @@ -152,22 +152,22 @@ static struct irq_cfg irq_cfgx[] = { #else static struct irq_cfg irq_cfgx[NR_IRQS] = { #endif - [0] = { .vector = IRQ0_VECTOR, }, - [1] = { .vector = IRQ1_VECTOR, }, - [2] = { .vector = IRQ2_VECTOR, }, - [3] = { .vector = IRQ3_VECTOR, }, - [4] = { .vector = IRQ4_VECTOR, }, - [5] = { .vector = IRQ5_VECTOR, }, - [6] = { .vector = IRQ6_VECTOR, }, - [7] = { .vector = IRQ7_VECTOR, }, - [8] = { .vector = IRQ8_VECTOR, }, - [9] = { .vector = IRQ9_VECTOR, }, - [10] = { .vector = IRQ10_VECTOR, }, - [11] = { .vector = IRQ11_VECTOR, }, - [12] = { .vector = IRQ12_VECTOR, }, - [13] = { .vector = IRQ13_VECTOR, }, - [14] = { .vector = IRQ14_VECTOR, }, - [15] = { .vector = IRQ15_VECTOR, }, + [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; int __init arch_early_irq_init(void) @@ -183,10 +183,6 @@ int __init arch_early_irq_init(void) for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; - alloc_bootmem_cpumask_var(&cfg[i].domain); - alloc_bootmem_cpumask_var(&cfg[i].old_domain); - if (i < NR_IRQS_LEGACY) - cpumask_setall(cfg[i].domain); } return 0; @@ -213,20 +209,6 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); - if (cfg) { - /* FIXME: needs alloc_cpumask_var_node() */ - if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) { - kfree(cfg); - cfg = NULL; - } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { - free_cpumask_var(cfg->domain); - kfree(cfg); - cfg = NULL; - } else { - cpumask_clear(cfg->domain); - cpumask_clear(cfg->old_domain); - } - } printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); return cfg; @@ -351,14 +333,13 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) } } -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg = desc->chip_data; if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) + if (!cpus_intersects(desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -373,8 +354,7 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) +static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { } #endif @@ -505,26 +485,6 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; @@ -560,55 +520,41 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); -/* - * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid - * of that, or returns BAD_APICID and leaves desc->affinity untouched. - */ -static unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; unsigned int irq; - if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; irq = desc->irq; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; + return; - cpumask_and(&desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); -} - -static void -set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - unsigned int irq; - irq = desc->irq; - cfg = desc->chip_data; + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, cfg); - } + __target_IO_APIC_irq(irq, dest, cfg); + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } -static void -set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { struct irq_desc *desc; @@ -706,7 +652,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) } #ifdef CONFIG_X86_64 -static void io_apic_sync(struct irq_pin_list *entry) +void io_apic_sync(struct irq_pin_list *entry) { /* * Synchronize the IO-APIC and the CPU by doing @@ -1276,8 +1222,7 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1292,49 +1237,49 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; - int cpu, err; - cpumask_var_t tmp_mask; + int cpu; if ((cfg->move_in_progress) || cfg->move_cleanup_count) return -EBUSY; - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); old_vector = cfg->vector; if (old_vector) { - cpumask_and(tmp_mask, mask, cpu_online_mask); - cpumask_and(tmp_mask, cfg->domain, tmp_mask); - if (!cpumask_empty(tmp_mask)) { - free_cpumask_var(tmp_mask); + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) return 0; - } } - /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; - for_each_cpu_and(cpu, mask, cpu_online_mask) { + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; int new_cpu; int vector, offset; - vector_allocation_domain(cpu, tmp_mask); + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); vector = current_vector; offset = current_offset; next: vector += 8; if (vector >= first_system_vector) { - /* If out of vectors on large boxen, must share them. */ + /* If we run out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; - - if (test_bit(vector, used_vectors)) +#ifdef CONFIG_X86_64 + if (vector == IA32_SYSCALL_VECTOR) goto next; - - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif + for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; /* Found one! */ @@ -1342,21 +1287,18 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) current_offset = offset; if (old_vector) { cfg->move_in_progress = 1; - cpumask_copy(cfg->old_domain, cfg->domain); + cfg->old_domain = cfg->domain; } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) + for_each_cpu_mask_nr(new_cpu, new_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); - err = 0; - break; + cfg->domain = domain; + return 0; } - free_cpumask_var(tmp_mask); - return err; + return -ENOSPC; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { int err; unsigned long flags; @@ -1369,20 +1311,23 @@ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { + cpumask_t mask; int cpu, vector; BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; - cpumask_clear(cfg->domain); + cpus_clear(cfg->domain); if (likely(!cfg->move_in_progress)) return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + cpus_and(mask, cfg->old_domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -1405,7 +1350,7 @@ void __setup_vector_irq(int cpu) /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; @@ -1417,7 +1362,7 @@ void __setup_vector_irq(int cpu) continue; cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -1553,17 +1498,18 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de { struct irq_cfg *cfg; struct IO_APIC_route_entry entry; - unsigned int dest; + cpumask_t mask; if (!IO_APIC_IRQ(irq)) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, TARGET_CPUS)) + mask = TARGET_CPUS; + if (assign_irq_vector(irq, cfg, mask)) return; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cpus_and(mask, cfg->domain, mask); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1573,7 +1519,8 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - dest, trigger, polarity, cfg->vector)) { + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); __clear_irq_vector(irq, cfg); @@ -2293,7 +2240,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2342,17 +2289,18 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void -migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; + cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; unsigned int irq; - if (!cpumask_intersects(mask, cpu_online_mask)) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; irq = desc->irq; @@ -2365,7 +2313,8 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) set_extra_move_desc(desc, mask); - dest = cpu_mask_to_apicid_and(cfg->domain, mask); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -2382,10 +2331,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) */ modify_irte(irq, &irte); - if (cfg->move_in_progress) - send_cleanup_vector(cfg); + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } - cpumask_copy(&desc->affinity, mask); + desc->affinity = mask; } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2407,11 +2360,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpus_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2436,7 +2389,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, &desc->pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2445,20 +2398,18 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, - const struct cpumask *mask) +static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); + desc->pending_mask = mask; migrate_irq_remapped_level_desc(desc); return; } migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, - const struct cpumask *mask) +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -2493,7 +2444,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (!cfg->move_cleanup_count) goto unlock; - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) goto unlock; __get_cpu_var(vector_irq)[vector] = -1; @@ -2530,14 +2481,20 @@ static void irq_complete_move(struct irq_desc **descp) vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; #endif - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } } #else static inline void irq_complete_move(struct irq_desc **descp) {} @@ -3259,13 +3216,16 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms struct irq_cfg *cfg; int err; unsigned dest; + cpumask_t tmp; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, cfg, tmp); if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3319,18 +3279,26 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return; + + set_extra_move_desc(desc, mask); + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); read_msi_msg_desc(desc, &msg); @@ -3340,27 +3308,37 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); + desc->affinity = mask; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void -ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg = desc->chip_data; + struct irq_cfg *cfg; unsigned int dest; + cpumask_t tmp, cleanup_mask; struct irte irte; + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + if (get_irte(irq, &irte)) return; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; + set_extra_move_desc(desc, mask); + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3374,8 +3352,14 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) * at the new destination. So, time to cleanup the previous * vector allocation. */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc->affinity = mask; } #endif @@ -3566,18 +3550,26 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return; + + set_extra_move_desc(desc, mask); + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); dmar_msi_read(irq, &msg); @@ -3587,6 +3579,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -3620,18 +3613,26 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return; + + set_extra_move_desc(desc, mask); + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); hpet_msi_read(irq, &msg); @@ -3641,6 +3642,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -3695,19 +3697,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return; + + set_extra_move_desc(desc, mask); + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); + desc->affinity = mask; } #endif @@ -3727,14 +3738,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; int err; + cpumask_t tmp; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, cfg, tmp); if (!err) { struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3770,7 +3784,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset) { - const struct cpumask *eligible_cpu = cpumask_of(cpu); + const cpumask_t *eligible_cpu = get_cpu_mask(cpu); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3780,7 +3794,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, eligible_cpu); + err = assign_irq_vector(irq, cfg, *eligible_cpu); if (err != 0) return err; @@ -3799,7 +3813,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(eligible_cpu); + entry->dest = cpu_mask_to_apicid(*eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -4010,7 +4024,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; struct irq_desc *desc; struct irq_cfg *cfg; - const struct cpumask *mask; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -4041,7 +4055,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; diff --git a/trunk/arch/x86/kernel/ipi.c b/trunk/arch/x86/kernel/ipi.c index 285bbf8831fa..f1c688e46f35 100644 --- a/trunk/arch/x86/kernel/ipi.c +++ b/trunk/arch/x86/kernel/ipi.c @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) { - unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); __send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(const struct cpumask *mask, int vector) +void send_IPI_mask_sequence(cpumask_t mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -139,24 +139,12 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); - local_irq_restore(flags); -} - -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) + for_each_possible_cpu(query_cpu) { + if (cpu_isset(query_cpu, mask)) { __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + } + } local_irq_restore(flags); } diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c index bce53e1352a0..3f1d9d18df67 100644 --- a/trunk/arch/x86/kernel/irq.c +++ b/trunk/arch/x86/kernel/irq.c @@ -9,7 +9,6 @@ #include #include #include -#include atomic_t irq_err_count; @@ -191,5 +190,3 @@ u64 arch_irq_stat(void) #endif return sum; } - -EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c index 9dc5588f336a..119fc9c8ff7f 100644 --- a/trunk/arch/x86/kernel/irq_32.c +++ b/trunk/arch/x86/kernel/irq_32.c @@ -233,28 +233,27 @@ unsigned int do_IRQ(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU #include -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) +void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; + cpumask_t mask; if (!desc) continue; if (irq == 2) continue; - affinity = &desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + cpus_and(mask, desc->affinity, map); + if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; + mask = map; } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); + desc->chip->set_affinity(irq, mask); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/trunk/arch/x86/kernel/irq_64.c b/trunk/arch/x86/kernel/irq_64.c index 6383d50f82ea..a174a217eb1a 100644 --- a/trunk/arch/x86/kernel/irq_64.c +++ b/trunk/arch/x86/kernel/irq_64.c @@ -80,17 +80,16 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) +void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { + cpumask_t mask; int break_affinity = 0; int set_affinity = 1; - const struct cpumask *affinity; if (!desc) continue; @@ -100,23 +99,23 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { + cpus_equal(desc->affinity, map)) { spin_unlock(&desc->lock); continue; } - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + cpus_and(mask, desc->affinity, map); + if (cpus_empty(mask)) { break_affinity = 1; - affinity = cpu_all_mask; + mask = map; } if (desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); + desc->chip->set_affinity(irq, mask); else if (!(warned++)) set_affinity = 0; diff --git a/trunk/arch/x86/kernel/irqinit_32.c b/trunk/arch/x86/kernel/irqinit_32.c index 84723295f88a..203384ed2b5d 100644 --- a/trunk/arch/x86/kernel/irqinit_32.c +++ b/trunk/arch/x86/kernel/irqinit_32.c @@ -110,18 +110,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -158,12 +146,10 @@ void __init native_init_IRQ(void) alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* IPI for single call function */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/trunk/arch/x86/kernel/irqinit_64.c b/trunk/arch/x86/kernel/irqinit_64.c index 31ebfe38e96c..6190e6ef546c 100644 --- a/trunk/arch/x86/kernel/irqinit_64.c +++ b/trunk/arch/x86/kernel/irqinit_64.c @@ -69,18 +69,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - void __init init_ISA_irqs(void) { int i; @@ -133,7 +121,6 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif } diff --git a/trunk/arch/x86/kernel/kvmclock.c b/trunk/arch/x86/kernel/kvmclock.c index 652fce6d2cce..e169ae9b6a62 100644 --- a/trunk/arch/x86/kernel/kvmclock.c +++ b/trunk/arch/x86/kernel/kvmclock.c @@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void) */ static unsigned long kvm_get_tsc_khz(void) { - struct pvclock_vcpu_time_info *src; - src = &per_cpu(hv_clock, 0); - return pvclock_tsc_khz(src); + return preset_lpj; } static void kvm_get_preset_lpj(void) { + struct pvclock_vcpu_time_info *src; unsigned long khz; u64 lpj; - khz = kvm_get_tsc_khz(); + src = &per_cpu(hv_clock, 0); + khz = pvclock_tsc_khz(src); lpj = ((u64)khz * 1000); do_div(lpj, HZ); @@ -194,7 +194,5 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register(&kvm_clock); - pv_info.paravirt_enabled = 1; - pv_info.name = "KVM"; } } diff --git a/trunk/arch/x86/kernel/ldt.c b/trunk/arch/x86/kernel/ldt.c index 71f1d99a635d..eee32b43fee3 100644 --- a/trunk/arch/x86/kernel/ldt.c +++ b/trunk/arch/x86/kernel/ldt.c @@ -12,8 +12,8 @@ #include #include #include -#include +#include #include #include #include @@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) if (err < 0) return err; - for (i = 0; i < old->size; i++) + for(i = 0; i < old->size; i++) write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } diff --git a/trunk/arch/x86/kernel/mfgpt_32.c b/trunk/arch/x86/kernel/mfgpt_32.c index c12314c9e86f..3b599518c322 100644 --- a/trunk/arch/x86/kernel/mfgpt_32.c +++ b/trunk/arch/x86/kernel/mfgpt_32.c @@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = { .set_mode = mfgpt_set_mode, .set_next_event = mfgpt_next_event, .rating = 250, - .cpumask = cpu_all_mask, + .cpumask = CPU_MASK_ALL, .shift = 32 }; diff --git a/trunk/arch/x86/kernel/mmconf-fam10h_64.c b/trunk/arch/x86/kernel/mmconf-fam10h_64.c index 666e43df51f9..efc2f361fe85 100644 --- a/trunk/arch/x86/kernel/mmconf-fam10h_64.c +++ b/trunk/arch/x86/kernel/mmconf-fam10h_64.c @@ -13,7 +13,8 @@ #include #include #include -#include + +#include "../pci/pci.h" struct pci_hostbridge_probe { u32 bus; diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c index c5c5b8df1dbc..45e3b69808ba 100644 --- a/trunk/arch/x86/kernel/mpparse.c +++ b/trunk/arch/x86/kernel/mpparse.c @@ -16,14 +16,14 @@ #include #include #include -#include -#include +#include #include #include #include #include #include +#include #include #include #include @@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m) #endif if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) + set_bit(m->mpc_busid, mp_bus_not_pci); +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { @@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) x86_quirks->mpc_oem_pci_bus(m); clear_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; diff --git a/trunk/arch/x86/kernel/nmi.c b/trunk/arch/x86/kernel/nmi.c index 45a09ccdc214..8bd1bf9622a7 100644 --- a/trunk/arch/x86/kernel/nmi.c +++ b/trunk/arch/x86/kernel/nmi.c @@ -26,10 +26,11 @@ #include #include #include -#include #include #include +#include +#include #include #include diff --git a/trunk/arch/x86/kernel/pci-gart_64.c b/trunk/arch/x86/kernel/pci-gart_64.c index 00c2bcd41463..a35eaa379ff6 100644 --- a/trunk/arch/x86/kernel/pci-gart_64.c +++ b/trunk/arch/x86/kernel/pci-gart_64.c @@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */ * to trigger bugs with some popular PCI cards, in particular 3ware (but * has been also also seen with Qlogic at least). */ -static int iommu_fullflush = 1; +int iommu_fullflush = 1; /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index bf088c61fa40..61f718df6eec 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -12,8 +12,6 @@ #include #include #include -#include -#include #ifdef CONFIG_X86_32 # include @@ -25,6 +23,7 @@ #include + /* * Power off function, if any */ @@ -40,12 +39,6 @@ int reboot_force; static int reboot_cpu = -1; #endif -/* This is set if we need to go through the 'emergency' path. - * When machine_emergency_restart() is called, we may be on - * an inconsistent state and won't be able to do a clean cleanup - */ -static int reboot_emergency; - /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ bool port_cf9_safe = false; @@ -375,48 +368,6 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct die_args *args) -{ - cpu_emergency_vmxoff(); -} - -/* Use NMIs as IPIs to tell all CPUs to disable virtualization - */ -static void emergency_vmx_disable_all(void) -{ - /* Just make sure we won't change CPUs while doing this */ - local_irq_disable(); - - /* We need to disable VMX on all CPUs before rebooting, otherwise - * we risk hanging up the machine, because the CPU ignore INIT - * signals when VMX is enabled. - * - * We can't take any locks and we may be on an inconsistent - * state, so we use NMIs as IPIs to tell the other CPUs to disable - * VMX and halt. - * - * For safety, we will avoid running the nmi_shootdown_cpus() - * stuff unnecessarily, but we don't have a way to check - * if other CPUs have VMX enabled. So we will call it only if the - * CPU we are running on has VMX enabled. - * - * We will miss cases where VMX is not enabled on all CPUs. This - * shouldn't do much harm because KVM always enable VMX on all - * CPUs anyway. But we can miss it on the small window where KVM - * is still enabling VMX. - */ - if (cpu_has_vmx() && cpu_vmx_enabled()) { - /* Disable VMX on this CPU. - */ - cpu_vmxoff(); - - /* Halt and disable VMX on the other CPUs */ - nmi_shootdown_cpus(vmxoff_nmi); - - } -} - - void __attribute__((weak)) mach_reboot_fixups(void) { } @@ -425,9 +376,6 @@ static void native_machine_emergency_restart(void) { int i; - if (reboot_emergency) - emergency_vmx_disable_all(); - /* Tell the BIOS if we want cold or warm reboot */ *((unsigned short *)__va(0x472)) = reboot_mode; @@ -534,19 +482,13 @@ void native_machine_shutdown(void) #endif } -static void __machine_emergency_restart(int emergency) -{ - reboot_emergency = emergency; - machine_ops.emergency_restart(); -} - static void native_machine_restart(char *__unused) { printk("machine restart\n"); if (!reboot_force) machine_shutdown(); - __machine_emergency_restart(0); + machine_emergency_restart(); } static void native_machine_halt(void) @@ -590,7 +532,7 @@ void machine_shutdown(void) void machine_emergency_restart(void) { - __machine_emergency_restart(1); + machine_ops.emergency_restart(); } void machine_restart(char *cmd) @@ -650,7 +592,10 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - send_IPI_allbutself(NMI_VECTOR); + cpumask_t mask = cpu_online_map; + cpu_clear(safe_smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/trunk/arch/x86/kernel/setup_percpu.c b/trunk/arch/x86/kernel/setup_percpu.c index 0b63b08e7530..ae0c0d3bb770 100644 --- a/trunk/arch/x86/kernel/setup_percpu.c +++ b/trunk/arch/x86/kernel/setup_percpu.c @@ -152,11 +152,6 @@ void __init setup_per_cpu_areas(void) old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); size = roundup(old_size, align); - - printk(KERN_INFO - "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", - NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -173,24 +168,24 @@ void __init setup_per_cpu_areas(void) "cpu %d has no node %d or node-local memory\n", cpu, node); if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d at %016lx\n", + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, __pa(MAX_DMA_ADDRESS)); if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d on node%d " - "at %016lx\n", - cpu, node, __pa(ptr)); + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", + NR_CPUS, nr_cpu_ids, nr_node_ids); + /* Setup percpu data maps */ setup_per_cpu_maps(); @@ -287,7 +282,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) else cpu_clear(cpu, *mask); - cpulist_scnprintf(buf, sizeof(buf), mask); + cpulist_scnprintf(buf, sizeof(buf), *mask); printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); } diff --git a/trunk/arch/x86/kernel/smp.c b/trunk/arch/x86/kernel/smp.c index beea2649a240..7e558db362c1 100644 --- a/trunk/arch/x86/kernel/smp.c +++ b/trunk/arch/x86/kernel/smp.c @@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); } -void native_send_call_func_ipi(const struct cpumask *mask) +void native_send_call_func_ipi(cpumask_t mask) { cpumask_t allbutself; allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - if (cpus_equal(*mask, allbutself) && + if (cpus_equal(mask, allbutself) && cpus_equal(cpu_online_map, cpu_callout_map)) send_IPI_allbutself(CALL_FUNCTION_VECTOR); else diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index 31869bf5fabd..f8500c969442 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -102,8 +102,14 @@ EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; +/* bitmap of online cpus */ +cpumask_t cpu_online_map __read_mostly; +EXPORT_SYMBOL(cpu_online_map); + cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); @@ -1254,15 +1260,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } -static int __initdata setup_possible_cpus = -1; -static int __init _setup_possible_cpus(char *str) -{ - get_option(&str, &setup_possible_cpus); - return 0; -} -early_param("possible_cpus", _setup_possible_cpus); - - /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1275,7 +1272,7 @@ early_param("possible_cpus", _setup_possible_cpus); * * Three ways to find out the number of additional hotplug CPUs: * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM + * - The user can overwrite it with additional_cpus=NUM * - Otherwise don't reserve additional CPUs. * We do this because additional CPUs waste a lot of memory. * -AK @@ -1288,17 +1285,9 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; - if (setup_possible_cpus == -1) - possible = num_processors + disabled_cpus; - else - possible = setup_possible_cpus; - - if (possible > CONFIG_NR_CPUS) { - printk(KERN_WARNING - "%d Processors exceeds NR_CPUS limit of %d\n", - possible, CONFIG_NR_CPUS); - possible = CONFIG_NR_CPUS; - } + possible = num_processors + disabled_cpus; + if (possible > NR_CPUS) + possible = NR_CPUS; printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); @@ -1363,7 +1352,7 @@ void cpu_disable_common(void) lock_vector_lock(); remove_cpu_from_maps(cpu); unlock_vector_lock(); - fixup_irqs(); + fixup_irqs(cpu_online_map); } int native_cpu_disable(void) diff --git a/trunk/arch/x86/kernel/tlb_32.c b/trunk/arch/x86/kernel/tlb_32.c index ce5054642247..8da059f949be 100644 --- a/trunk/arch/x86/kernel/tlb_32.c +++ b/trunk/arch/x86/kernel/tlb_32.c @@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ diff --git a/trunk/arch/x86/kernel/tlb_64.c b/trunk/arch/x86/kernel/tlb_64.c index f8be6f1d2e48..29887d7081a9 100644 --- a/trunk/arch/x86/kernel/tlb_64.c +++ b/trunk/arch/x86/kernel/tlb_64.c @@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpus_empty(f->flush_cpumask)) cpu_relax(); diff --git a/trunk/arch/x86/kernel/tlb_uv.c b/trunk/arch/x86/kernel/tlb_uv.c index f885023167e0..6a00e5faaa74 100644 --- a/trunk/arch/x86/kernel/tlb_uv.c +++ b/trunk/arch/x86/kernel/tlb_uv.c @@ -582,6 +582,7 @@ static int __init uv_ptc_init(void) static struct bau_control * __init uv_table_bases_init(int blade, int node) { int i; + int *ip; struct bau_msg_status *msp; struct bau_control *bau_tabp; @@ -598,6 +599,13 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node) bau_cpubits_clear(&msp->seen_by, (int) uv_blade_nr_possible_cpus(blade)); + bau_tabp->watching = + kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); + BUG_ON(!bau_tabp->watching); + + for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) + *ip = 0; + uv_bau_table_bases[blade] = bau_tabp; return bau_tabp; @@ -620,6 +628,7 @@ uv_table_bases_finish(int blade, int node, int cur_cpu, bcp->bau_msg_head = bau_tablesp->va_queue_first; bcp->va_queue_first = bau_tablesp->va_queue_first; bcp->va_queue_last = bau_tablesp->va_queue_last; + bcp->watching = bau_tablesp->watching; bcp->msg_statuses = bau_tablesp->msg_statuses; bcp->descriptor_base = adp; } diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index ce6650eb64e9..141907ab6e22 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -72,6 +72,9 @@ #include "cpu/mcheck/mce.h" +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -86,9 +89,6 @@ gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; #endif -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - static int ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) @@ -292,10 +292,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) tsk->thread.error_code = error_code; tsk->thread.trap_no = 8; - /* - * This is always a kernel trap and never fixable (and thus must - * never return). - */ + /* This is always a kernel trap and never fixable (and thus must + never return). */ for (;;) die(str, regs, error_code); } @@ -522,11 +520,9 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) } #ifdef CONFIG_X86_64 -/* - * Help handler running on IST stack to switch back to user stack - * for scheduling or signal handling. The actual stack switch is done in - * entry.S - */ +/* Help handler running on IST stack to switch back to user stack + for scheduling or signal handling. The actual stack switch is done in + entry.S */ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; @@ -536,10 +532,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) /* Exception from user space */ else if (user_mode(eregs)) regs = task_pt_regs(current); - /* - * Exception from kernel and interrupts are enabled. Move to - * kernel process stack. - */ + /* Exception from kernel and interrupts are enabled. Move to + kernel process stack. */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -691,7 +685,12 @@ void math_error(void __user *ip) cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - err = swd & ~cwd; + err = swd & ~cwd & 0x3f; + +#ifdef CONFIG_X86_32 + if (!err) + return; +#endif if (err & 0x001) { /* Invalid op */ /* @@ -709,11 +708,7 @@ void math_error(void __user *ip) } else if (err & 0x020) { /* Precision */ info.si_code = FPE_FLTRES; } else { - /* - * If we're using IRQ 13, or supposedly even some trap 16 - * implementations, it's possible we get a spurious trap... - */ - return; /* Spurious trap, no error */ + info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ } force_sig_info(SIGFPE, &info, task); } @@ -946,7 +941,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { +#ifdef CONFIG_X86_32 int i; +#endif #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); @@ -1003,15 +1000,11 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); -#endif /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); -#ifdef CONFIG_X86_64 - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#else set_bit(SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/trunk/arch/x86/kernel/vmiclock_32.c b/trunk/arch/x86/kernel/vmiclock_32.c index c4c1f9e09402..254ee07f8635 100644 --- a/trunk/arch/x86/kernel/vmiclock_32.c +++ b/trunk/arch/x86/kernel/vmiclock_32.c @@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) /* Upper bound is clockevent's use of ulong for cycle deltas. */ evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); evt->min_delta_ns = clockevent_delta2ns(1, evt); - evt->cpumask = cpumask_of(cpu); + evt->cpumask = cpumask_of_cpu(cpu); printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", evt->name, evt->mult, evt->shift); diff --git a/trunk/arch/x86/kernel/xsave.c b/trunk/arch/x86/kernel/xsave.c index 2b54fe002e94..15c3e6999182 100644 --- a/trunk/arch/x86/kernel/xsave.c +++ b/trunk/arch/x86/kernel/xsave.c @@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf) * Restore the extended state if present. Otherwise, restore the FP/SSE * state. */ -static int restore_user_xstate(void __user *buf) +int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; u64 mask; diff --git a/trunk/arch/x86/kvm/i8254.c b/trunk/arch/x86/kvm/i8254.c index e665d1c623ca..59ebd37ad79e 100644 --- a/trunk/arch/x86/kvm/i8254.c +++ b/trunk/arch/x86/kvm/i8254.c @@ -603,29 +603,10 @@ void kvm_free_pit(struct kvm *kvm) static void __inject_pit_timer_intr(struct kvm *kvm) { - struct kvm_vcpu *vcpu; - int i; - mutex_lock(&kvm->lock); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); mutex_unlock(&kvm->lock); - - /* - * Provides NMI watchdog support via Virtual Wire mode. - * The route is: PIT -> PIC -> LVT0 in NMI mode. - * - * Note: Our Virtual Wire implementation is simplified, only - * propagating PIT interrupts to all VCPUs when they have set - * LVT0 to NMI delivery. Other PIC interrupts are just sent to - * VCPU0, and only if its LVT0 is in EXTINT mode. - */ - if (kvm->arch.vapics_in_nmi_mode > 0) - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (vcpu) - kvm_apic_nmi_wd_deliver(vcpu); - } } void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) diff --git a/trunk/arch/x86/kvm/i8259.c b/trunk/arch/x86/kvm/i8259.c index 179dcb0103fd..17e41e165f1a 100644 --- a/trunk/arch/x86/kvm/i8259.c +++ b/trunk/arch/x86/kvm/i8259.c @@ -26,40 +26,10 @@ * Port from Qemu. */ #include -#include #include "irq.h" #include -static void pic_lock(struct kvm_pic *s) -{ - spin_lock(&s->lock); -} - -static void pic_unlock(struct kvm_pic *s) -{ - struct kvm *kvm = s->kvm; - unsigned acks = s->pending_acks; - bool wakeup = s->wakeup_needed; - struct kvm_vcpu *vcpu; - - s->pending_acks = 0; - s->wakeup_needed = false; - - spin_unlock(&s->lock); - - while (acks) { - kvm_notify_acked_irq(kvm, __ffs(acks)); - acks &= acks - 1; - } - - if (wakeup) { - vcpu = s->kvm->vcpus[0]; - if (vcpu) - kvm_vcpu_kick(vcpu); - } -} - static void pic_clear_isr(struct kvm_kpic_state *s, int irq) { s->isr &= ~(1 << irq); @@ -166,21 +136,17 @@ static void pic_update_irq(struct kvm_pic *s) void kvm_pic_update_irq(struct kvm_pic *s) { - pic_lock(s); pic_update_irq(s); - pic_unlock(s); } void kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; - pic_lock(s); if (irq >= 0 && irq < PIC_NUM_PINS) { pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); } - pic_unlock(s); } /* @@ -206,7 +172,6 @@ int kvm_pic_read_irq(struct kvm *kvm) int irq, irq2, intno; struct kvm_pic *s = pic_irqchip(kvm); - pic_lock(s); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { pic_intack(&s->pics[0], irq); @@ -231,7 +196,6 @@ int kvm_pic_read_irq(struct kvm *kvm) intno = s->pics[0].irq_base + irq; } pic_update_irq(s); - pic_unlock(s); kvm_notify_acked_irq(kvm, irq); return intno; @@ -239,7 +203,7 @@ int kvm_pic_read_irq(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s) { - int irq, irqbase, n; + int irq, irqbase; struct kvm *kvm = s->pics_state->irq_request_opaque; struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; @@ -250,10 +214,8 @@ void kvm_pic_reset(struct kvm_kpic_state *s) for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) - if (s->irr & (1 << irq) || s->isr & (1 << irq)) { - n = irq + irqbase; - s->pics_state->pending_acks |= 1 << n; - } + if (s->irr & (1 << irq) || s->isr & (1 << irq)) + kvm_notify_acked_irq(kvm, irq+irqbase); } s->last_irr = 0; s->irr = 0; @@ -444,7 +406,6 @@ static void picdev_write(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte write\n"); return; } - pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -457,7 +418,6 @@ static void picdev_write(struct kvm_io_device *this, elcr_ioport_write(&s->pics[addr & 1], addr, data); break; } - pic_unlock(s); } static void picdev_read(struct kvm_io_device *this, @@ -471,7 +431,6 @@ static void picdev_read(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte read\n"); return; } - pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -485,7 +444,6 @@ static void picdev_read(struct kvm_io_device *this, break; } *(unsigned char *)val = data; - pic_unlock(s); } /* @@ -501,7 +459,7 @@ static void pic_irq_request(void *opaque, int level) s->output = level; if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { s->pics[0].isr_ack &= ~(1 << irq); - s->wakeup_needed = true; + kvm_vcpu_kick(vcpu); } } @@ -511,8 +469,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); if (!s) return NULL; - spin_lock_init(&s->lock); - s->kvm = kvm; s->pics[0].elcr_mask = 0xf8; s->pics[1].elcr_mask = 0xde; s->irq_request = pic_irq_request; diff --git a/trunk/arch/x86/kvm/irq.h b/trunk/arch/x86/kvm/irq.h index 2bf32a03ceec..f17c8f5bbf31 100644 --- a/trunk/arch/x86/kvm/irq.h +++ b/trunk/arch/x86/kvm/irq.h @@ -25,7 +25,6 @@ #include #include #include -#include #include "iodev.h" #include "ioapic.h" @@ -60,10 +59,6 @@ struct kvm_kpic_state { }; struct kvm_pic { - spinlock_t lock; - bool wakeup_needed; - unsigned pending_acks; - struct kvm *kvm; struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ irq_request_func *irq_request; void *irq_request_opaque; @@ -92,7 +87,6 @@ void kvm_pic_reset(struct kvm_kpic_state *s); void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); -void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); diff --git a/trunk/arch/x86/kvm/kvm_svm.h b/trunk/arch/x86/kvm/kvm_svm.h index 8e5ee99551f6..65ef0fc2c036 100644 --- a/trunk/arch/x86/kvm/kvm_svm.h +++ b/trunk/arch/x86/kvm/kvm_svm.h @@ -7,7 +7,7 @@ #include #include -#include +#include "svm.h" static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 diff --git a/trunk/arch/x86/kvm/lapic.c b/trunk/arch/x86/kvm/lapic.c index afac68c0815c..0fc3cab48943 100644 --- a/trunk/arch/x86/kvm/lapic.c +++ b/trunk/arch/x86/kvm/lapic.c @@ -130,11 +130,6 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic) return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; } -static inline int apic_lvt_nmi_mode(u32 lvt_val) -{ - return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; -} - static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ @@ -359,7 +354,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_NMI: kvm_inject_nmi(vcpu); - kvm_vcpu_kick(vcpu); break; case APIC_DM_INIT: @@ -386,14 +380,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } break; - case APIC_DM_EXTINT: - /* - * Should only be called by kvm_apic_local_deliver() with LVT0, - * before NMI watchdog was enabled. Already handled by - * kvm_apic_accept_pic_intr(). - */ - break; - default: printk(KERN_ERR "TODO: unsupported delivery mode %x\n", delivery_mode); @@ -677,20 +663,6 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->timer.period))); } -static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) -{ - int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); - - if (apic_lvt_nmi_mode(lvt0_val)) { - if (!nmi_wd_enabled) { - apic_debug("Receive NMI setting on APIC_LVT0 " - "for cpu %d\n", apic->vcpu->vcpu_id); - apic->vcpu->kvm->arch.vapics_in_nmi_mode++; - } - } else if (nmi_wd_enabled) - apic->vcpu->kvm->arch.vapics_in_nmi_mode--; -} - static void apic_mmio_write(struct kvm_io_device *this, gpa_t address, int len, const void *data) { @@ -771,11 +743,10 @@ static void apic_mmio_write(struct kvm_io_device *this, apic_set_reg(apic, APIC_ICR2, val & 0xff000000); break; - case APIC_LVT0: - apic_manage_nmi_watchdog(apic, val); case APIC_LVTT: case APIC_LVTTHMR: case APIC_LVTPC: + case APIC_LVT0: case APIC_LVT1: case APIC_LVTERR: /* TODO: Check vector */ @@ -990,26 +961,12 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) -{ - u32 reg = apic_get_reg(apic, lvt_type); - int vector, mode, trig_mode; - - if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { - vector = reg & APIC_VECTOR_MASK; - mode = reg & APIC_MODE_MASK; - trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode); - } - return 0; -} - -void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) +static int __inject_apic_timer_irq(struct kvm_lapic *apic) { - struct kvm_lapic *apic = vcpu->arch.apic; + int vector; - if (apic) - kvm_apic_local_deliver(apic, APIC_LVT0); + vector = apic_lvt_vector(apic, APIC_LVTT); + return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); } static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) @@ -1104,8 +1061,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic && atomic_read(&apic->timer.pending) > 0) { - if (kvm_apic_local_deliver(apic, APIC_LVTT)) + if (apic && apic_lvt_enabled(apic, APIC_LVTT) && + atomic_read(&apic->timer.pending) > 0) { + if (__inject_apic_timer_irq(apic)) atomic_dec(&apic->timer.pending); } } diff --git a/trunk/arch/x86/kvm/mmu.c b/trunk/arch/x86/kvm/mmu.c index 83f11c7474a1..410ddbc1aa2e 100644 --- a/trunk/arch/x86/kvm/mmu.c +++ b/trunk/arch/x86/kvm/mmu.c @@ -17,6 +17,7 @@ * */ +#include "vmx.h" #include "mmu.h" #include @@ -32,7 +33,6 @@ #include #include #include -#include /* * When setting this variable to true it enables Two-Dimensional-Paging @@ -168,7 +168,6 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; -static u64 __read_mostly shadow_mt_mask; void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -184,14 +183,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte) EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; - shadow_mt_mask = mt_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -386,9 +384,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - gfn = unalias_gfn(kvm, gfn); - write_count = slot_largepage_idx(gfn, - gfn_to_memslot_unaliased(kvm, gfn)); + write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); *write_count += 1; } @@ -396,20 +392,16 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - gfn = unalias_gfn(kvm, gfn); - write_count = slot_largepage_idx(gfn, - gfn_to_memslot_unaliased(kvm, gfn)); + write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); *write_count -= 1; WARN_ON(*write_count < 0); } static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) { - struct kvm_memory_slot *slot; + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); int *largepage_idx; - gfn = unalias_gfn(kvm, gfn); - slot = gfn_to_memslot_unaliased(kvm, gfn); if (slot) { largepage_idx = slot_largepage_idx(gfn, slot); return *largepage_idx; @@ -621,7 +613,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) return NULL; } -static int rmap_write_protect(struct kvm *kvm, u64 gfn) +static void rmap_write_protect(struct kvm *kvm, u64 gfn) { unsigned long *rmapp; u64 *spte; @@ -667,7 +659,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) spte = rmap_next(kvm, rmapp, spte); } - return write_protected; + if (write_protected) + kvm_flush_remote_tlbs(kvm); } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) @@ -793,11 +786,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&sp->oos_link); ASSERT(is_empty_shadow_page(sp->spt)); - bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + sp->slot_bitmap = 0; sp->multimapped = 0; - sp->global = 1; sp->parent_pte = parent_pte; --vcpu->kvm->arch.n_free_mmu_pages; return sp; @@ -909,9 +900,8 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte) struct kvm_mmu_page *sp = page_header(__pa(spte)); index = spte - sp->spt; - if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) - sp->unsync_children++; - WARN_ON(!sp->unsync_children); + __set_bit(index, sp->unsync_child_bitmap); + sp->unsync_children = 1; } static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) @@ -938,6 +928,7 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { + sp->unsync_children = 1; kvm_mmu_update_parents_unsync(sp); return 1; } @@ -968,66 +959,38 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { } -#define KVM_PAGE_ARRAY_NR 16 - -struct kvm_mmu_pages { - struct mmu_page_and_offset { - struct kvm_mmu_page *sp; - unsigned int idx; - } page[KVM_PAGE_ARRAY_NR]; - unsigned int nr; -}; - #define for_each_unsync_children(bitmap, idx) \ for (idx = find_first_bit(bitmap, 512); \ idx < 512; \ idx = find_next_bit(bitmap, 512, idx+1)) -int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, - int idx) +static int mmu_unsync_walk(struct kvm_mmu_page *sp, + struct kvm_unsync_walk *walker) { - int i; - - if (sp->unsync) - for (i=0; i < pvec->nr; i++) - if (pvec->page[i].sp == sp) - return 0; + int i, ret; - pvec->page[pvec->nr].sp = sp; - pvec->page[pvec->nr].idx = idx; - pvec->nr++; - return (pvec->nr == KVM_PAGE_ARRAY_NR); -} - -static int __mmu_unsync_walk(struct kvm_mmu_page *sp, - struct kvm_mmu_pages *pvec) -{ - int i, ret, nr_unsync_leaf = 0; + if (!sp->unsync_children) + return 0; for_each_unsync_children(sp->unsync_child_bitmap, i) { u64 ent = sp->spt[i]; - if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { + if (is_shadow_present_pte(ent)) { struct kvm_mmu_page *child; child = page_header(ent & PT64_BASE_ADDR_MASK); if (child->unsync_children) { - if (mmu_pages_add(pvec, child, i)) - return -ENOSPC; - - ret = __mmu_unsync_walk(child, pvec); - if (!ret) - __clear_bit(i, sp->unsync_child_bitmap); - else if (ret > 0) - nr_unsync_leaf += ret; - else + ret = mmu_unsync_walk(child, walker); + if (ret) return ret; + __clear_bit(i, sp->unsync_child_bitmap); } if (child->unsync) { - nr_unsync_leaf++; - if (mmu_pages_add(pvec, child, i)) - return -ENOSPC; + ret = walker->entry(child, walker); + __clear_bit(i, sp->unsync_child_bitmap); + if (ret) + return ret; } } } @@ -1035,17 +998,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) sp->unsync_children = 0; - return nr_unsync_leaf; -} - -static int mmu_unsync_walk(struct kvm_mmu_page *sp, - struct kvm_mmu_pages *pvec) -{ - if (!sp->unsync_children) - return 0; - - mmu_pages_add(pvec, sp, 0); - return __mmu_unsync_walk(sp, pvec); + return 0; } static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) @@ -1068,18 +1021,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) return NULL; } -static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - list_del(&sp->oos_link); - --kvm->stat.mmu_unsync_global; -} - static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON(!sp->unsync); sp->unsync = 0; - if (sp->global) - kvm_unlink_unsync_global(kvm, sp); --kvm->stat.mmu_unsync; } @@ -1092,8 +1037,7 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 1; } - if (rmap_write_protect(vcpu->kvm, sp->gfn)) - kvm_flush_remote_tlbs(vcpu->kvm); + rmap_write_protect(vcpu->kvm, sp->gfn); kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); @@ -1104,89 +1048,30 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 0; } -struct mmu_page_path { - struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; - unsigned int idx[PT64_ROOT_LEVEL-1]; +struct sync_walker { + struct kvm_vcpu *vcpu; + struct kvm_unsync_walk walker; }; -#define for_each_sp(pvec, sp, parents, i) \ - for (i = mmu_pages_next(&pvec, &parents, -1), \ - sp = pvec.page[i].sp; \ - i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ - i = mmu_pages_next(&pvec, &parents, i)) - -int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, - int i) +static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) { - int n; + struct sync_walker *sync_walk = container_of(walk, struct sync_walker, + walker); + struct kvm_vcpu *vcpu = sync_walk->vcpu; - for (n = i+1; n < pvec->nr; n++) { - struct kvm_mmu_page *sp = pvec->page[n].sp; - - if (sp->role.level == PT_PAGE_TABLE_LEVEL) { - parents->idx[0] = pvec->page[n].idx; - return n; - } - - parents->parent[sp->role.level-2] = sp; - parents->idx[sp->role.level-1] = pvec->page[n].idx; - } - - return n; -} - -void mmu_pages_clear_parents(struct mmu_page_path *parents) -{ - struct kvm_mmu_page *sp; - unsigned int level = 0; - - do { - unsigned int idx = parents->idx[level]; - - sp = parents->parent[level]; - if (!sp) - return; - - --sp->unsync_children; - WARN_ON((int)sp->unsync_children < 0); - __clear_bit(idx, sp->unsync_child_bitmap); - level++; - } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); -} - -static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, - struct mmu_page_path *parents, - struct kvm_mmu_pages *pvec) -{ - parents->parent[parent->role.level-1] = NULL; - pvec->nr = 0; + kvm_sync_page(vcpu, sp); + return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); } -static void mmu_sync_children(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *parent) +static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - int i; - struct kvm_mmu_page *sp; - struct mmu_page_path parents; - struct kvm_mmu_pages pages; - - kvm_mmu_pages_init(parent, &parents, &pages); - while (mmu_unsync_walk(parent, &pages)) { - int protected = 0; - - for_each_sp(pages, sp, parents, i) - protected |= rmap_write_protect(vcpu->kvm, sp->gfn); - - if (protected) - kvm_flush_remote_tlbs(vcpu->kvm); + struct sync_walker walker = { + .walker = { .entry = mmu_sync_fn, }, + .vcpu = vcpu, + }; - for_each_sp(pages, sp, parents, i) { - kvm_sync_page(vcpu, sp); - mmu_pages_clear_parents(&parents); - } + while (mmu_unsync_walk(sp, &walker.walker)) cond_resched_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_pages_init(parent, &parents, &pages); - } } static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, @@ -1244,8 +1129,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, sp->role = role; hlist_add_head(&sp->hash_link, bucket); if (!metaphysical) { - if (rmap_write_protect(vcpu->kvm, gfn)) - kvm_flush_remote_tlbs(vcpu->kvm); + rmap_write_protect(vcpu->kvm, gfn); account_shadowed(vcpu->kvm, gfn); } if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) @@ -1269,8 +1153,6 @@ static int walk_shadow(struct kvm_shadow_walk *walker, if (level == PT32E_ROOT_LEVEL) { shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; shadow_addr &= PT64_BASE_ADDR_MASK; - if (!shadow_addr) - return 1; --level; } @@ -1355,29 +1237,33 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) } } -static int mmu_zap_unsync_children(struct kvm *kvm, - struct kvm_mmu_page *parent) -{ - int i, zapped = 0; - struct mmu_page_path parents; - struct kvm_mmu_pages pages; - - if (parent->role.level == PT_PAGE_TABLE_LEVEL) - return 0; +struct zap_walker { + struct kvm_unsync_walk walker; + struct kvm *kvm; + int zapped; +}; - kvm_mmu_pages_init(parent, &parents, &pages); - while (mmu_unsync_walk(parent, &pages)) { - struct kvm_mmu_page *sp; +static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) +{ + struct zap_walker *zap_walk = container_of(walk, struct zap_walker, + walker); + kvm_mmu_zap_page(zap_walk->kvm, sp); + zap_walk->zapped = 1; + return 0; +} - for_each_sp(pages, sp, parents, i) { - kvm_mmu_zap_page(kvm, sp); - mmu_pages_clear_parents(&parents); - } - zapped += pages.nr; - kvm_mmu_pages_init(parent, &parents, &pages); - } +static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + struct zap_walker walker = { + .walker = { .entry = mmu_zap_fn, }, + .kvm = kvm, + .zapped = 0, + }; - return zapped; + if (sp->role.level == PT_PAGE_TABLE_LEVEL) + return 0; + mmu_unsync_walk(sp, &walker.walker); + return walker.zapped; } static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -1476,7 +1362,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); struct kvm_mmu_page *sp = page_header(__pa(pte)); - __set_bit(slot, sp->slot_bitmap); + __set_bit(slot, &sp->slot_bitmap); } static void mmu_convert_notrap(struct kvm_mmu_page *sp) @@ -1507,110 +1393,6 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) return page; } -/* - * The function is based on mtrr_type_lookup() in - * arch/x86/kernel/cpu/mtrr/generic.c - */ -static int get_mtrr_type(struct mtrr_state_type *mtrr_state, - u64 start, u64 end) -{ - int i; - u64 base, mask; - u8 prev_match, curr_match; - int num_var_ranges = KVM_NR_VAR_MTRR; - - if (!mtrr_state->enabled) - return 0xFF; - - /* Make end inclusive end, instead of exclusive */ - end--; - - /* Look in fixed ranges. Just return the type as per start */ - if (mtrr_state->have_fixed && (start < 0x100000)) { - int idx; - - if (start < 0x80000) { - idx = 0; - idx += (start >> 16); - return mtrr_state->fixed_ranges[idx]; - } else if (start < 0xC0000) { - idx = 1 * 8; - idx += ((start - 0x80000) >> 14); - return mtrr_state->fixed_ranges[idx]; - } else if (start < 0x1000000) { - idx = 3 * 8; - idx += ((start - 0xC0000) >> 12); - return mtrr_state->fixed_ranges[idx]; - } - } - - /* - * Look in variable ranges - * Look of multiple ranges matching this address and pick type - * as per MTRR precedence - */ - if (!(mtrr_state->enabled & 2)) - return mtrr_state->def_type; - - prev_match = 0xFF; - for (i = 0; i < num_var_ranges; ++i) { - unsigned short start_state, end_state; - - if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) - continue; - - base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + - (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); - mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + - (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); - - start_state = ((start & mask) == (base & mask)); - end_state = ((end & mask) == (base & mask)); - if (start_state != end_state) - return 0xFE; - - if ((start & mask) != (base & mask)) - continue; - - curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; - if (prev_match == 0xFF) { - prev_match = curr_match; - continue; - } - - if (prev_match == MTRR_TYPE_UNCACHABLE || - curr_match == MTRR_TYPE_UNCACHABLE) - return MTRR_TYPE_UNCACHABLE; - - if ((prev_match == MTRR_TYPE_WRBACK && - curr_match == MTRR_TYPE_WRTHROUGH) || - (prev_match == MTRR_TYPE_WRTHROUGH && - curr_match == MTRR_TYPE_WRBACK)) { - prev_match = MTRR_TYPE_WRTHROUGH; - curr_match = MTRR_TYPE_WRTHROUGH; - } - - if (prev_match != curr_match) - return MTRR_TYPE_UNCACHABLE; - } - - if (prev_match != 0xFF) - return prev_match; - - return mtrr_state->def_type; -} - -static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) -{ - u8 mtrr; - - mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, - (gfn << PAGE_SHIFT) + PAGE_SIZE); - if (mtrr == 0xfe || mtrr == 0xff) - mtrr = MTRR_TYPE_WRBACK; - return mtrr; -} - static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { unsigned index; @@ -1627,15 +1409,9 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (s->role.word != sp->role.word) return 1; } + kvm_mmu_mark_parents_unsync(vcpu, sp); ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; - - if (sp->global) { - list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); - ++vcpu->kvm->stat.mmu_unsync_global; - } else - kvm_mmu_mark_parents_unsync(vcpu, sp); - mmu_convert_notrap(sp); return 0; } @@ -1661,24 +1437,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pte_access, int user_fault, int write_fault, int dirty, int largepage, - int global, gfn_t gfn, pfn_t pfn, bool speculative, + gfn_t gfn, pfn_t pfn, bool speculative, bool can_unsync) { u64 spte; int ret = 0; - u64 mt_mask = shadow_mt_mask; - struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); - - if (!(vcpu->arch.cr4 & X86_CR4_PGE)) - global = 0; - if (!global && sp->global) { - sp->global = 0; - if (sp->unsync) { - kvm_unlink_unsync_global(vcpu->kvm, sp); - kvm_mmu_mark_parents_unsync(vcpu, sp); - } - } - /* * We don't set the accessed bit, since we sometimes want to see * whether the guest actually used the pte (in order to detect @@ -1697,11 +1460,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; - if (mt_mask) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); - spte |= mt_mask; - } spte |= (u64)pfn << PAGE_SHIFT; @@ -1716,15 +1474,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= PT_WRITABLE_MASK; - /* - * Optimization: for pte sync, if spte was writable the hash - * lookup is unnecessary (and expensive). Write protection - * is responsibility of mmu_get_page / kvm_sync_page. - * Same reasoning can be applied to dirty page accounting. - */ - if (!can_unsync && is_writeble_pte(*shadow_pte)) - goto set_pte; - if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { pgprintk("%s: found shadow page for %lx, marking ro\n", __func__, gfn); @@ -1746,8 +1495,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, - int *ptwrite, int largepage, int global, - gfn_t gfn, pfn_t pfn, bool speculative) + int *ptwrite, int largepage, gfn_t gfn, + pfn_t pfn, bool speculative) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); @@ -1780,7 +1529,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, - dirty, largepage, global, gfn, pfn, speculative, true)) { + dirty, largepage, gfn, pfn, speculative, true)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1837,7 +1586,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk, || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 0, walk->write, 1, &walk->pt_write, - walk->largepage, 0, gfn, walk->pfn, false); + walk->largepage, gfn, walk->pfn, false); ++vcpu->stat.pf_fixed; return 1; } @@ -2024,15 +1773,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) } } -static void mmu_sync_global(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = vcpu->kvm; - struct kvm_mmu_page *sp, *n; - - list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) - kvm_sync_page(vcpu, sp); -} - void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->kvm->mmu_lock); @@ -2040,13 +1780,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->kvm->mmu_lock); } -void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) -{ - spin_lock(&vcpu->kvm->mmu_lock); - mmu_sync_global(vcpu); - spin_unlock(&vcpu->kvm->mmu_lock); -} - static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) { return vaddr; @@ -2445,8 +2178,7 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes, - bool guest_initiated) + const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; @@ -2472,17 +2204,15 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, "pre pte write"); - if (guest_initiated) { - if (gfn == vcpu->arch.last_pt_write_gfn - && !last_updated_pte_accessed(vcpu)) { - ++vcpu->arch.last_pt_write_count; - if (vcpu->arch.last_pt_write_count >= 3) - flooded = 1; - } else { - vcpu->arch.last_pt_write_gfn = gfn; - vcpu->arch.last_pt_write_count = 1; - vcpu->arch.last_pte_updated = NULL; - } + if (gfn == vcpu->arch.last_pt_write_gfn + && !last_updated_pte_accessed(vcpu)) { + ++vcpu->arch.last_pt_write_count; + if (vcpu->arch.last_pt_write_count >= 3) + flooded = 1; + } else { + vcpu->arch.last_pt_write_gfn = gfn; + vcpu->arch.last_pt_write_count = 1; + vcpu->arch.last_pte_updated = NULL; } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; @@ -2622,7 +2352,9 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { + spin_lock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.invlpg(vcpu, gva); + spin_unlock(&vcpu->kvm->mmu_lock); kvm_mmu_flush_tlb(vcpu); ++vcpu->stat.invlpg; } @@ -2719,7 +2451,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) int i; u64 *pt; - if (!test_bit(slot, sp->slot_bitmap)) + if (!test_bit(slot, &sp->slot_bitmap)) continue; pt = sp->spt; @@ -3128,8 +2860,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) if (sp->role.metaphysical) continue; + slot = gfn_to_memslot(vcpu->kvm, sp->gfn); gfn = unalias_gfn(vcpu->kvm, sp->gfn); - slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); rmapp = &slot->rmap[gfn - slot->base_gfn]; if (*rmapp) printk(KERN_ERR "%s: (%s) shadow page has writable" diff --git a/trunk/arch/x86/kvm/paging_tmpl.h b/trunk/arch/x86/kvm/paging_tmpl.h index 9fd78b6e17ad..84eee43bbe74 100644 --- a/trunk/arch/x86/kvm/paging_tmpl.h +++ b/trunk/arch/x86/kvm/paging_tmpl.h @@ -82,7 +82,6 @@ struct shadow_walker { int *ptwrite; pfn_t pfn; u64 *sptep; - gpa_t pte_gpa; }; static gfn_t gpte_to_gfn(pt_element_t gpte) @@ -223,7 +222,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, if (ret) goto walk; pte |= PT_DIRTY_MASK; - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); + kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); walker->ptes[walker->level - 1] = pte; } @@ -275,8 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, - gpte & PT_DIRTY_MASK, NULL, largepage, - gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), + gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), pfn, true); } @@ -303,9 +301,8 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, sw->user_fault, sw->write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, - sw->ptwrite, sw->largepage, - gw->ptes[gw->level-1] & PT_GLOBAL_MASK, - gw->gfn, sw->pfn, false); + sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, + false); sw->sptep = sptep; return 1; } @@ -469,22 +466,10 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, struct kvm_vcpu *vcpu, u64 addr, u64 *sptep, int level) { - struct shadow_walker *sw = - container_of(_sw, struct shadow_walker, walker); - /* FIXME: properly handle invlpg on large guest pages */ - if (level == PT_PAGE_TABLE_LEVEL || - ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); - - sw->pte_gpa = (sp->gfn << PAGE_SHIFT); - sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); - - if (is_shadow_present_pte(*sptep)) { + if (level == PT_PAGE_TABLE_LEVEL) { + if (is_shadow_present_pte(*sptep)) rmap_remove(vcpu->kvm, sptep); - if (is_large_pte(*sptep)) - --vcpu->kvm->stat.lpages; - } set_shadow_pte(sptep, shadow_trap_nonpresent_pte); return 1; } @@ -495,26 +480,11 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { - pt_element_t gpte; struct shadow_walker walker = { .walker = { .entry = FNAME(shadow_invlpg_entry), }, - .pte_gpa = -1, }; - spin_lock(&vcpu->kvm->mmu_lock); walk_shadow(&walker.walker, vcpu, gva); - spin_unlock(&vcpu->kvm->mmu_lock); - if (walker.pte_gpa == -1) - return; - if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, - sizeof(pt_element_t))) - return; - if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { - if (mmu_topup_memory_caches(vcpu)) - return; - kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, - sizeof(pt_element_t), 0); - } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) @@ -610,7 +580,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, - is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, + is_dirty_pte(gpte), 0, gfn, spte_to_pfn(sp->spt[i]), true, false); } diff --git a/trunk/arch/x86/kvm/svm.c b/trunk/arch/x86/kvm/svm.c index 1452851ae258..9c4ce657d963 100644 --- a/trunk/arch/x86/kvm/svm.c +++ b/trunk/arch/x86/kvm/svm.c @@ -28,8 +28,6 @@ #include -#include - #define __ex(x) __kvm_handle_fault_on_reboot(x) MODULE_AUTHOR("Qumranet"); @@ -247,19 +245,34 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) static int has_svm(void) { - const char *msg; + uint32_t eax, ebx, ecx, edx; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + printk(KERN_INFO "has_svm: not amd\n"); + return 0; + } - if (!cpu_has_svm(&msg)) { - printk(KERN_INFO "has_svn: %s\n", msg); + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + if (eax < SVM_CPUID_FUNC) { + printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); return 0; } + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { + printk(KERN_DEBUG "has_svm: svm not available\n"); + return 0; + } return 1; } static void svm_hardware_disable(void *garbage) { - cpu_svm_disable(); + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); } static void svm_hardware_enable(void *garbage) @@ -759,22 +772,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; - - /* - * SVM always stores 0 for the 'G' bit in the CS selector in - * the VMCB on a VMEXIT. This hurts cross-vendor migration: - * Intel's VMENTRY has a check on the 'G' bit. - */ - if (seg == VCPU_SREG_CS) - var->g = s->limit > 0xfffff; - - /* - * Work around a bug where the busy flag in the tr selector - * isn't exposed - */ - if (seg == VCPU_SREG_TR) - var->type |= 0x2; - var->unusable = !var->present; } @@ -1102,7 +1099,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) rep = (io_info & SVM_IOIO_REP_MASK) != 0; down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; - skip_emulated_instruction(&svm->vcpu); return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); } @@ -1916,11 +1912,6 @@ static int get_npt_level(void) #endif } -static int svm_get_mt_mask_shift(void) -{ - return 0; -} - static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -1976,7 +1967,6 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, - .get_mt_mask_shift = svm_get_mt_mask_shift, }; static int __init svm_init(void) diff --git a/trunk/arch/x86/include/asm/svm.h b/trunk/arch/x86/kvm/svm.h similarity index 100% rename from trunk/arch/x86/include/asm/svm.h rename to trunk/arch/x86/kvm/svm.h diff --git a/trunk/arch/x86/kvm/vmx.c b/trunk/arch/x86/kvm/vmx.c index 6259d7467648..a4018b01e1f9 100644 --- a/trunk/arch/x86/kvm/vmx.c +++ b/trunk/arch/x86/kvm/vmx.c @@ -16,6 +16,7 @@ */ #include "irq.h" +#include "vmx.h" #include "mmu.h" #include @@ -30,8 +31,6 @@ #include #include -#include -#include #define __ex(x) __kvm_handle_fault_on_reboot(x) @@ -91,11 +90,6 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; - - /* Support for vnmi-less CPUs */ - int soft_vnmi_blocked; - ktime_t entry_time; - s64 vnmi_blocked_time; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -128,7 +122,7 @@ static struct vmcs_config { u32 vmentry_ctrl; } vmcs_config; -static struct vmx_capability { +struct vmx_capability { u32 ept; u32 vpid; } vmx_capability; @@ -963,13 +957,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); break; - case MSR_IA32_CR_PAT: - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - vmcs_write64(GUEST_IA32_PAT, data); - vcpu->arch.pat = data; - break; - } - /* Otherwise falls through to kvm_set_msr_common */ default: vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); @@ -1045,7 +1032,8 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) static __init int cpu_has_kvm_support(void) { - return cpu_has_vmx(); + unsigned long ecx = cpuid_ecx(1); + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ } static __init int vmx_disabled_by_bios(void) @@ -1091,20 +1079,11 @@ static void vmclear_local_vcpus(void) __vcpu_clear(vmx); } - -/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() - * tricks. - */ -static void kvm_cpu_vmxoff(void) -{ - asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); - write_cr4(read_cr4() & ~X86_CR4_VMXE); -} - static void hardware_disable(void *garbage) { vmclear_local_vcpus(); - kvm_cpu_vmxoff(); + asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); } static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, @@ -1197,13 +1176,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; + opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; - min = 0; - opt = VM_ENTRY_LOAD_IA32_PAT; + min = opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, &_vmentry_control) < 0) return -EIO; @@ -2109,9 +2087,8 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) */ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { - u32 host_sysenter_cs, msr_low, msr_high; + u32 host_sysenter_cs; u32 junk; - u64 host_pat; unsigned long a; struct descriptor_table dt; int i; @@ -2199,20 +2176,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) rdmsrl(MSR_IA32_SYSENTER_EIP, a); vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ - if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { - rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); - host_pat = msr_low | ((u64) msr_high << 32); - vmcs_write64(HOST_IA32_PAT, host_pat); - } - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); - host_pat = msr_low | ((u64) msr_high << 32); - /* Write the default value follow host pat */ - vmcs_write64(GUEST_IA32_PAT, host_pat); - /* Keep arch.pat sync with GUEST_IA32_PAT */ - vmx->vcpu.arch.pat = host_pat; - } - for (i = 0; i < NR_VMX_MSR; ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; @@ -2267,8 +2230,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.rmode.active = 0; - vmx->soft_vnmi_blocked = 0; - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; @@ -2374,29 +2335,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) return ret; } -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - if (!cpu_has_virtual_nmis()) { - enable_irq_window(vcpu); - return; - } - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2420,54 +2358,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!cpu_has_virtual_nmis()) { - /* - * Tracking the NMI-blocked state in software is built upon - * finding the next open IRQ window. This, in turn, depends on - * well-behaving guests: They have to keep IRQs disabled at - * least as long as the NMI handler runs. Otherwise we may - * cause NMI nesting, maybe breaking the guest. But as this is - * highly unlikely, we can live with the residual risk. - */ - vmx->soft_vnmi_blocked = 1; - vmx->vnmi_blocked_time = 0; - } - - ++vcpu->stat.nmi_injections; - if (vcpu->arch.rmode.active) { - vmx->rmode.irq.pending = true; - vmx->rmode.irq.vector = NMI_VECTOR; - vmx->rmode.irq.rip = kvm_rip_read(vcpu); - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - NMI_VECTOR | INTR_TYPE_SOFT_INTR | - INTR_INFO_VALID_MASK); - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); - kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); - return; - } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } -static void vmx_update_window_states(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - - vcpu->arch.nmi_window_open = - !(guest_intr & (GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_NMI)); - if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) - vcpu->arch.nmi_window_open = 0; - - vcpu->arch.interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(guest_intr & (GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS))); -} - static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2480,49 +2374,40 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) kvm_queue_interrupt(vcpu, irq); } + static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vmx_update_window_states(vcpu); + u32 cpu_based_vm_exec_control; - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vcpu->arch.nmi_window_open) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_nmi_window(vcpu); - return; - } - } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (vcpu->arch.irq_summary - || kvm_run->request_interrupt_window) - enable_irq_window(vcpu); - return; - } + vcpu->arch.interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); - if (vcpu->arch.interrupt_window_open) { - if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) - kvm_do_inject_irq(vcpu); + if (vcpu->arch.interrupt_window_open && + vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) + kvm_do_inject_irq(vcpu); - if (vcpu->arch.interrupt.pending) - vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - } + if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) + vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); if (!vcpu->arch.interrupt_window_open && (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) - enable_irq_window(vcpu); + /* + * Interrupts blocked. Wait for unblock. + */ + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + else + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; struct kvm_userspace_memory_region tss_mem = { - .slot = TSS_PRIVATE_MEMSLOT, + .slot = 8, .guest_phys_addr = addr, .memory_size = PAGE_SIZE * 3, .flags = 0, @@ -2607,7 +2492,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); } - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -2696,7 +2581,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; - skip_emulated_instruction(vcpu); return kvm_emulate_pio(vcpu, kvm_run, in, size, port); } @@ -2883,7 +2767,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); KVMTRACE_0D(PEND_INTR, vcpu, handler); - ++vcpu->stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as @@ -2892,6 +2775,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, if (kvm_run->request_interrupt_window && !vcpu->arch.irq_summary) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + ++vcpu->stat.irq_window_exits; return 0; } return 1; @@ -2948,7 +2832,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; int reason; @@ -2956,15 +2839,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && - (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) - == INTR_TYPE_NMI_INTR) { - vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - } tss_selector = exit_qualification; return kvm_task_switch(vcpu, tss_selector, reason); @@ -3053,12 +2927,16 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, while (!guest_state_valid(vcpu)) { err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); - if (err == EMULATE_DO_MMIO) - break; - - if (err != EMULATE_DONE) { - kvm_report_emulation_failure(vcpu, "emulation failure"); - return; + switch (err) { + case EMULATE_DONE: + break; + case EMULATE_DO_MMIO: + kvm_report_emulation_failure(vcpu, "mmio"); + /* TODO: Handle MMIO */ + return; + default: + kvm_report_emulation_failure(vcpu, "emulation failure"); + return; } if (signal_pending(current)) @@ -3070,10 +2948,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); - /* Guest state should be valid now except if we need to - * emulate an MMIO */ - if (guest_state_valid(vcpu)) - vmx->emulation_required = 0; + /* Guest state should be valid now, no more emulation should be needed */ + vmx->emulation_required = 0; } /* @@ -3120,11 +2996,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); - /* If we need to emulate an MMIO from handle_invalid_guest_state - * we just return 0 */ - if (vmx->emulation_required && emulate_invalid_guest_state) - return 0; - /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ if (vm_need_ept() && is_paging(vcpu)) { @@ -3141,32 +3012,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if ((vectoring_info & VECTORING_INFO_VALID_MASK) && (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION && - exit_reason != EXIT_REASON_TASK_SWITCH)) - printk(KERN_WARNING "%s: unexpected, valid vectoring info " - "(0x%x) and exit reason is 0x%x\n", - __func__, vectoring_info, exit_reason); - - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { - if (vcpu->arch.interrupt_window_open) { - vmx->soft_vnmi_blocked = 0; - vcpu->arch.nmi_window_open = 1; - } else if (vmx->vnmi_blocked_time > 1000000000LL && - vcpu->arch.nmi_pending) { - /* - * This CPU don't support us in finding the end of an - * NMI-blocked window if the guest runs with IRQs - * disabled. So we pull the trigger after 1 s of - * futile waiting, but inform the user about this. - */ - printk(KERN_WARNING "%s: Breaking out of NMI-blocked " - "state on VCPU %d after 1 s timeout\n", - __func__, vcpu->vcpu_id); - vmx->soft_vnmi_blocked = 0; - vmx->vcpu.arch.nmi_window_open = 1; - } - } - + exit_reason != EXIT_REASON_EPT_VIOLATION)) + printk(KERN_WARNING "%s: unexpected, valid vectoring info and " + "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); @@ -3194,6 +3042,51 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); } +static void enable_irq_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + if (!cpu_has_virtual_nmis()) + return; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return !(guest_intr & (GUEST_INTR_STATE_NMI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)); +} + +static int vmx_irq_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)) && + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); +} + +static void enable_intr_window(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); +} + static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -3216,9 +3109,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) if (unblock_nmi && vector != DF_VECTOR) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - } else if (unlikely(vmx->soft_vnmi_blocked)) - vmx->vnmi_blocked_time += - ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); + } idt_vectoring_info = vmx->idt_vectoring_info; idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; @@ -3256,29 +3147,26 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) { update_tpr_threshold(vcpu); - vmx_update_window_states(vcpu); - - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vcpu->arch.nmi_window_open) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_nmi_window(vcpu); + if (cpu_has_virtual_nmis()) { + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vmx_nmi_enabled(vcpu)) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_intr_window(vcpu); + return; + } + } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + enable_intr_window(vcpu); return; } } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); - return; - } if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { - if (vcpu->arch.interrupt_window_open) + if (vmx_irq_enabled(vcpu)) kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); else enable_irq_window(vcpu); @@ -3286,8 +3174,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); - if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); } } @@ -3327,10 +3213,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info; - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) - vmx->entry_time = ktime_get(); - /* Handle invalid guest state instead of entering VMX */ if (vmx->emulation_required && emulate_invalid_guest_state) { handle_invalid_guest_state(vcpu, kvm_run); @@ -3445,7 +3327,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - vmx_update_window_states(vcpu); + vcpu->arch.interrupt_window_open = + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; @@ -3453,7 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && (intr_info & INTR_INFO_VALID_MASK)) { KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); @@ -3571,11 +3455,6 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } -static int vmx_get_mt_mask_shift(void) -{ - return VMX_EPT_MT_EPTE_SHIFT; -} - static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -3631,7 +3510,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, - .get_mt_mask_shift = vmx_get_mt_mask_shift, }; static int __init vmx_init(void) @@ -3688,10 +3566,10 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, - VMX_EPT_EXECUTABLE_MASK, - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); } else kvm_disable_tdp(); diff --git a/trunk/arch/x86/include/asm/vmx.h b/trunk/arch/x86/kvm/vmx.h similarity index 93% rename from trunk/arch/x86/include/asm/vmx.h rename to trunk/arch/x86/kvm/vmx.h index d0238e6151d8..ec5edc339da6 100644 --- a/trunk/arch/x86/include/asm/vmx.h +++ b/trunk/arch/x86/kvm/vmx.h @@ -63,13 +63,10 @@ #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 -#define VM_EXIT_SAVE_IA32_PAT 0x00040000 -#define VM_EXIT_LOAD_IA32_PAT 0x00080000 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 -#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 /* VMCS Encodings */ enum vmcs_field { @@ -115,8 +112,6 @@ enum vmcs_field { VMCS_LINK_POINTER_HIGH = 0x00002801, GUEST_IA32_DEBUGCTL = 0x00002802, GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, - GUEST_IA32_PAT = 0x00002804, - GUEST_IA32_PAT_HIGH = 0x00002805, GUEST_PDPTR0 = 0x0000280a, GUEST_PDPTR0_HIGH = 0x0000280b, GUEST_PDPTR1 = 0x0000280c, @@ -125,8 +120,6 @@ enum vmcs_field { GUEST_PDPTR2_HIGH = 0x0000280f, GUEST_PDPTR3 = 0x00002810, GUEST_PDPTR3_HIGH = 0x00002811, - HOST_IA32_PAT = 0x00002c00, - HOST_IA32_PAT_HIGH = 0x00002c01, PIN_BASED_VM_EXEC_CONTROL = 0x00004000, CPU_BASED_VM_EXEC_CONTROL = 0x00004002, EXCEPTION_BITMAP = 0x00004004, @@ -338,9 +331,8 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 @@ -364,19 +356,4 @@ enum vmcs_field { #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul - -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" - - - #endif diff --git a/trunk/arch/x86/kvm/x86.c b/trunk/arch/x86/kvm/x86.c index 0e6aa8141dcd..f1f8ff2f1fa2 100644 --- a/trunk/arch/x86/kvm/x86.c +++ b/trunk/arch/x86/kvm/x86.c @@ -39,7 +39,6 @@ #include #include #include -#include #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -87,7 +86,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, - { "request_nmi", VCPU_STAT(request_nmi_exits) }, { "irq_exits", VCPU_STAT(irq_exits) }, { "host_state_reload", VCPU_STAT(host_state_reload) }, { "efer_reload", VCPU_STAT(efer_reload) }, @@ -95,7 +93,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "insn_emulation", VCPU_STAT(insn_emulation) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, { "irq_injections", VCPU_STAT(irq_injections) }, - { "nmi_injections", VCPU_STAT(nmi_injections) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -104,7 +101,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_recycled", VM_STAT(mmu_recycled) }, { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, - { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages) }, { NULL } @@ -316,7 +312,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops->set_cr0(vcpu, cr0); vcpu->arch.cr0 = cr0; - kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); return; } @@ -360,7 +355,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; - kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } EXPORT_SYMBOL_GPL(kvm_set_cr4); @@ -455,7 +449,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT + MSR_IA32_PERF_STATUS, }; static unsigned num_msrs_to_save; @@ -654,38 +648,10 @@ static bool msr_mtrr_valid(unsigned msr) static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { - u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; - if (!msr_mtrr_valid(msr)) return 1; - if (msr == MSR_MTRRdefType) { - vcpu->arch.mtrr_state.def_type = data; - vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; - } else if (msr == MSR_MTRRfix64K_00000) - p[0] = data; - else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) - p[1 + msr - MSR_MTRRfix16K_80000] = data; - else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) - p[3 + msr - MSR_MTRRfix4K_C0000] = data; - else if (msr == MSR_IA32_CR_PAT) - vcpu->arch.pat = data; - else { /* Variable MTRRs */ - int idx, is_mtrr_mask; - u64 *pt; - - idx = (msr - 0x200) / 2; - is_mtrr_mask = msr - 0x200 - 2 * idx; - if (!is_mtrr_mask) - pt = - (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; - else - pt = - (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; - *pt = data; - } - - kvm_mmu_reset_context(vcpu); + vcpu->arch.mtrr[msr - 0x200] = data; return 0; } @@ -781,37 +747,10 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { - u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; - if (!msr_mtrr_valid(msr)) return 1; - if (msr == MSR_MTRRdefType) - *pdata = vcpu->arch.mtrr_state.def_type + - (vcpu->arch.mtrr_state.enabled << 10); - else if (msr == MSR_MTRRfix64K_00000) - *pdata = p[0]; - else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) - *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; - else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) - *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; - else if (msr == MSR_IA32_CR_PAT) - *pdata = vcpu->arch.pat; - else { /* Variable MTRRs */ - int idx, is_mtrr_mask; - u64 *pt; - - idx = (msr - 0x200) / 2; - is_mtrr_mask = msr - 0x200 - 2 * idx; - if (!is_mtrr_mask) - pt = - (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; - else - pt = - (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; - *pdata = *pt; - } - + *pdata = vcpu->arch.mtrr[msr - 0x200]; return 0; } @@ -964,6 +903,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQCHIP: case KVM_CAP_HLT: case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: + case KVM_CAP_USER_MEMORY: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: case KVM_CAP_CLOCKSOURCE: @@ -1248,7 +1188,6 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, int t, times = entry->eax & 0xff; entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; - entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; for (t = 1; t < times && *nent < maxnent; ++t) { do_cpuid_1_ent(&entry[t], function, 0); entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; @@ -1279,7 +1218,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until level_type is zero */ for (i = 1; *nent < maxnent; ++i) { - level_type = entry[i - 1].ecx & 0xff00; + level_type = entry[i - 1].ecx & 0xff; if (!level_type) break; do_cpuid_1_ent(&entry[i], function, i); @@ -1379,15 +1318,6 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return 0; } -static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) -{ - vcpu_load(vcpu); - kvm_inject_nmi(vcpu); - vcpu_put(vcpu); - - return 0; -} - static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, struct kvm_tpr_access_ctl *tac) { @@ -1447,13 +1377,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_NMI: { - r = kvm_vcpu_ioctl_nmi(vcpu); - if (r) - goto out; - r = 0; - break; - } case KVM_SET_CPUID: { struct kvm_cpuid __user *cpuid_arg = argp; struct kvm_cpuid cpuid; @@ -2045,7 +1968,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) return 0; - kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); + kvm_mmu_pte_write(vcpu, gpa, val, bytes); return 1; } @@ -2481,6 +2404,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, val = kvm_register_read(vcpu, VCPU_REGS_RAX); memcpy(vcpu->arch.pio_data, &val, 4); + kvm_x86_ops->skip_emulated_instruction(vcpu); + pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); @@ -2616,7 +2541,7 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_nonpresent_ptes(0ull, 0ull); kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); + PT_DIRTY_MASK, PT64_NX_MASK, 0); return 0; out: @@ -2804,7 +2729,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; /* when no next entry is found, the current entry[i] is reselected */ - for (j = i + 1; ; j = (j + 1) % nent) { + for (j = i + 1; j == i; j = (j + 1) % nent) { struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; if (ej->function == e->function) { ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; @@ -3048,7 +2973,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); - r = kvm_arch_vcpu_reset(vcpu); + r = kvm_x86_ops->vcpu_reset(vcpu); if (r) return r; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -3350,9 +3275,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, kvm_desct->padding = 0; } -static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, - u16 selector, - struct descriptor_table *dtable) +static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, + u16 selector, + struct descriptor_table *dtable) { if (selector & 1 << 2) { struct kvm_segment kvm_seg; @@ -3377,7 +3302,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct descriptor_table dtable; u16 index = selector >> 3; - get_segment_descriptor_dtable(vcpu, selector, &dtable); + get_segment_descritptor_dtable(vcpu, selector, &dtable); if (dtable.limit < index * 8 + 7) { kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); @@ -3396,7 +3321,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct descriptor_table dtable; u16 index = selector >> 3; - get_segment_descriptor_dtable(vcpu, selector, &dtable); + get_segment_descritptor_dtable(vcpu, selector, &dtable); if (dtable.limit < index * 8 + 7) return 1; @@ -3975,7 +3900,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) /* We do fxsave: this must be aligned. */ BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); - vcpu->arch.mtrr_state.have_fixed = 1; vcpu_load(vcpu); r = kvm_arch_vcpu_reset(vcpu); if (r == 0) @@ -4001,9 +3925,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = false; - return kvm_x86_ops->vcpu_reset(vcpu); } @@ -4091,7 +4012,6 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&kvm->arch.oos_global_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ @@ -4128,8 +4048,8 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { - kvm_free_all_assigned_devices(kvm); kvm_iommu_unmap_guest(kvm); + kvm_free_all_assigned_devices(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); @@ -4207,8 +4127,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED - || vcpu->arch.nmi_pending; + || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; } static void vcpu_kick_intr(void *info) diff --git a/trunk/arch/x86/kvm/x86_emulate.c b/trunk/arch/x86/kvm/x86_emulate.c index d174db7a3370..ea051173b0da 100644 --- a/trunk/arch/x86/kvm/x86_emulate.c +++ b/trunk/arch/x86/kvm/x86_emulate.c @@ -58,7 +58,6 @@ #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ #define SrcImm (5<<4) /* Immediate operand. */ #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ -#define SrcOne (7<<4) /* Implied '1' */ #define SrcMask (7<<4) /* Generic ModRM decode. */ #define ModRM (1<<7) @@ -71,23 +70,17 @@ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define GroupMask 0xff /* Group number stored in bits 0:7 */ -/* Source 2 operand type */ -#define Src2None (0<<29) -#define Src2CL (1<<29) -#define Src2ImmByte (2<<29) -#define Src2One (3<<29) -#define Src2Mask (7<<29) enum { Group1_80, Group1_81, Group1_82, Group1_83, Group1A, Group3_Byte, Group3, Group4, Group5, Group7, }; -static u32 opcode_table[256] = { +static u16 opcode_table[256] = { /* 0x00 - 0x07 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, + 0, 0, 0, 0, /* 0x08 - 0x0F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -202,7 +195,7 @@ static u32 opcode_table[256] = { ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, }; -static u32 twobyte_table[256] = { +static u16 twobyte_table[256] = { /* 0x00 - 0x0F */ 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, @@ -237,14 +230,9 @@ static u32 twobyte_table[256] = { /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, - DstMem | SrcReg | Src2ImmByte | ModRM, - DstMem | SrcReg | Src2CL | ModRM, 0, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, - DstMem | SrcReg | Src2ImmByte | ModRM, - DstMem | SrcReg | Src2CL | ModRM, - ModRM, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, /* 0xB0 - 0xB7 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, DstMem | SrcReg | ModRM | BitOp, @@ -265,7 +253,7 @@ static u32 twobyte_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -static u32 group_table[] = { +static u16 group_table[] = { [Group1_80*8] = ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, @@ -309,9 +297,9 @@ static u32 group_table[] = { SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, }; -static u32 group2_table[] = { +static u16 group2_table[] = { [Group7*8] = - SrcNone | ModRM, 0, 0, SrcNone | ModRM, + SrcNone | ModRM, 0, 0, 0, SrcNone | ModRM | DstMem | Mov, 0, SrcMem16 | ModRM | Mov, 0, }; @@ -371,48 +359,49 @@ static u32 group2_table[] = { "andl %"_msk",%"_LO32 _tmp"; " \ "orl %"_LO32 _tmp",%"_sav"; " -#ifdef CONFIG_X86_64 -#define ON64(x) x -#else -#define ON64(x) -#endif - -#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \ - do { \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op _suffix " %"_x"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : _y ((_src).val), "i" (EFLAGS_MASK)); \ - } while (0) - - /* Raw emulation: instruction has two explicit operands. */ #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ - do { \ - unsigned long _tmp; \ - \ - switch ((_dst).bytes) { \ - case 2: \ - ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \ - break; \ - case 4: \ - ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \ - break; \ - case 8: \ - ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \ - break; \ - } \ + do { \ + unsigned long _tmp; \ + \ + switch ((_dst).bytes) { \ + case 2: \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "4", "2") \ + _op"w %"_wx"3,%1; " \ + _POST_EFLAGS("0", "4", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (_tmp) \ + : _wy ((_src).val), "i" (EFLAGS_MASK)); \ + break; \ + case 4: \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "4", "2") \ + _op"l %"_lx"3,%1; " \ + _POST_EFLAGS("0", "4", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (_tmp) \ + : _ly ((_src).val), "i" (EFLAGS_MASK)); \ + break; \ + case 8: \ + __emulate_2op_8byte(_op, _src, _dst, \ + _eflags, _qx, _qy); \ + break; \ + } \ } while (0) #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long _tmp; \ + unsigned long __tmp; \ switch ((_dst).bytes) { \ case 1: \ - ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "4", "2") \ + _op"b %"_bx"3,%1; " \ + _POST_EFLAGS("0", "4", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (__tmp) \ + : _by ((_src).val), "i" (EFLAGS_MASK)); \ break; \ default: \ __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ @@ -436,69 +425,72 @@ static u32 group2_table[] = { __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ "w", "r", _LO32, "r", "", "r") -/* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ - do { \ - unsigned long _tmp; \ - _type _clv = (_cl).val; \ - _type _srcv = (_src).val; \ - _type _dstv = (_dst).val; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "2") \ - _op _suffix " %4,%1 \n" \ - _POST_EFLAGS("0", "5", "2") \ - : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ - ); \ - \ - (_cl).val = (unsigned long) _clv; \ - (_src).val = (unsigned long) _srcv; \ - (_dst).val = (unsigned long) _dstv; \ - } while (0) - -#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ - do { \ - switch ((_dst).bytes) { \ - case 2: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "w", unsigned short); \ - break; \ - case 4: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "l", unsigned int); \ - break; \ - case 8: \ - ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "q", unsigned long)); \ - break; \ - } \ - } while (0) - -#define __emulate_1op(_op, _dst, _eflags, _suffix) \ - do { \ - unsigned long _tmp; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op _suffix " %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "+m" ((_dst).val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - } while (0) - /* Instruction has only one explicit operand (no source operand). */ #define emulate_1op(_op, _dst, _eflags) \ do { \ + unsigned long _tmp; \ + \ switch ((_dst).bytes) { \ - case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ - case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ - case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ - case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ + case 1: \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "3", "2") \ + _op"b %1; " \ + _POST_EFLAGS("0", "3", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (_tmp) \ + : "i" (EFLAGS_MASK)); \ + break; \ + case 2: \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "3", "2") \ + _op"w %1; " \ + _POST_EFLAGS("0", "3", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (_tmp) \ + : "i" (EFLAGS_MASK)); \ + break; \ + case 4: \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "3", "2") \ + _op"l %1; " \ + _POST_EFLAGS("0", "3", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (_tmp) \ + : "i" (EFLAGS_MASK)); \ + break; \ + case 8: \ + __emulate_1op_8byte(_op, _dst, _eflags); \ + break; \ } \ } while (0) +/* Emulate an instruction with quadword operands (x86/64 only). */ +#if defined(CONFIG_X86_64) +#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ + do { \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "4", "2") \ + _op"q %"_qx"3,%1; " \ + _POST_EFLAGS("0", "4", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : _qy ((_src).val), "i" (EFLAGS_MASK)); \ + } while (0) + +#define __emulate_1op_8byte(_op, _dst, _eflags) \ + do { \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "3", "2") \ + _op"q %1; " \ + _POST_EFLAGS("0", "3", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : "i" (EFLAGS_MASK)); \ + } while (0) + +#elif defined(__i386__) +#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) +#define __emulate_1op_8byte(_op, _dst, _eflags) +#endif /* __i386__ */ + /* Fetch next part of the instruction being emulated. */ #define insn_fetch(_type, _size, _eip) \ ({ unsigned long _x; \ @@ -1049,33 +1041,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) c->src.bytes = 1; c->src.val = insn_fetch(s8, 1, c->eip); break; - case SrcOne: - c->src.bytes = 1; - c->src.val = 1; - break; - } - - /* - * Decode and fetch the second source operand: register, memory - * or immediate. - */ - switch (c->d & Src2Mask) { - case Src2None: - break; - case Src2CL: - c->src2.bytes = 1; - c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; - break; - case Src2ImmByte: - c->src2.type = OP_IMM; - c->src2.ptr = (unsigned long *)c->eip; - c->src2.bytes = 1; - c->src2.val = insn_fetch(u8, 1, c->eip); - break; - case Src2One: - c->src2.bytes = 1; - c->src2.val = 1; - break; } /* Decode and fetch the destination operand: register or memory. */ @@ -1135,33 +1100,20 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) c->regs[VCPU_REGS_RSP]); } -static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - struct decode_cache *c = &ctxt->decode; - int rc; - - rc = ops->read_emulated(register_address(c, ss_base(ctxt), - c->regs[VCPU_REGS_RSP]), - &c->src.val, c->src.bytes, ctxt->vcpu); - if (rc != 0) - return rc; - - register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); - return rc; -} - static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; int rc; - c->src.bytes = c->dst.bytes; - rc = emulate_pop(ctxt, ops); + rc = ops->read_std(register_address(c, ss_base(ctxt), + c->regs[VCPU_REGS_RSP]), + &c->dst.val, c->dst.bytes, ctxt->vcpu); if (rc != 0) return rc; - c->dst.val = c->src.val; + + register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); + return 0; } @@ -1463,15 +1415,24 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) emulate_1op("dec", c->dst, ctxt->eflags); break; case 0x50 ... 0x57: /* push reg */ - emulate_push(ctxt); + c->dst.type = OP_MEM; + c->dst.bytes = c->op_bytes; + c->dst.val = c->src.val; + register_address_increment(c, &c->regs[VCPU_REGS_RSP], + -c->op_bytes); + c->dst.ptr = (void *) register_address( + c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - c->src.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ops); - if (rc != 0) + if ((rc = ops->read_std(register_address(c, ss_base(ctxt), + c->regs[VCPU_REGS_RSP]), c->dst.ptr, + c->op_bytes, ctxt->vcpu)) != 0) goto done; - c->dst.val = c->src.val; + + register_address_increment(c, &c->regs[VCPU_REGS_RSP], + c->op_bytes); + c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) @@ -1630,9 +1591,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) emulate_push(ctxt); break; case 0x9d: /* popf */ - c->dst.type = OP_REG; c->dst.ptr = (unsigned long *) &ctxt->eflags; - c->dst.bytes = c->op_bytes; goto pop_instruction; case 0xa0 ... 0xa1: /* mov */ c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; @@ -1730,9 +1689,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) emulate_grp2(ctxt); break; case 0xc3: /* ret */ - c->dst.type = OP_REG; c->dst.ptr = &c->eip; - c->dst.bytes = c->op_bytes; goto pop_instruction; case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ mov: @@ -1821,7 +1778,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) c->eip = saved_eip; goto cannot_emulate; } - break; + return 0; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; break; @@ -2042,20 +1999,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); break; - case 0xa4: /* shld imm8, r, r/m */ - case 0xa5: /* shld cl, r, r/m */ - emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); - break; case 0xab: bts: /* bts */ /* only subword offset */ c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); break; - case 0xac: /* shrd imm8, r, r/m */ - case 0xad: /* shrd cl, r, r/m */ - emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags); - break; case 0xae: /* clflush */ break; case 0xb0 ... 0xb1: /* cmpxchg */ diff --git a/trunk/arch/x86/lguest/boot.c b/trunk/arch/x86/lguest/boot.c index a7ed208f81e3..50a779264bb1 100644 --- a/trunk/arch/x86/lguest/boot.c +++ b/trunk/arch/x86/lguest/boot.c @@ -738,7 +738,7 @@ static void lguest_time_init(void) /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ - lguest_clockevent.cpumask = cpumask_of(0); + lguest_clockevent.cpumask = cpumask_of_cpu(0); clockevents_register_device(&lguest_clockevent); /* Finally, we unblock the timer interrupt. */ diff --git a/trunk/arch/x86/mach-default/setup.c b/trunk/arch/x86/mach-default/setup.c index df167f265622..37b9ae4d44c5 100644 --- a/trunk/arch/x86/mach-default/setup.c +++ b/trunk/arch/x86/mach-default/setup.c @@ -133,28 +133,29 @@ void __init time_init_hook(void) **/ void mca_nmi_hook(void) { - /* - * If I recall correctly, there's a whole bunch of other things that + /* If I recall correctly, there's a whole bunch of other things that * we can do to check for NMI problems, but that's all I know about * at the moment. */ - pr_warning("NMI generated from unknown source!\n"); + + printk("NMI generated from unknown source!\n"); } #endif static __init int no_ipi_broadcast(char *str) { get_option(&str, &no_broadcast); - pr_info("Using %s mode\n", - no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); + printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : + "IPI Broadcast"); return 1; } + __setup("no_ipi_broadcast=", no_ipi_broadcast); static int __init print_ipi_mode(void) { - pr_info("Using IPI %s mode\n", - no_broadcast ? "No-Shortcut" : "Shortcut"); + printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : + "Shortcut"); return 0; } diff --git a/trunk/arch/x86/mach-generic/bigsmp.c b/trunk/arch/x86/mach-generic/bigsmp.c index bc4c7840b2a8..3624a364b7f3 100644 --- a/trunk/arch/x86/mach-generic/bigsmp.c +++ b/trunk/arch/x86/mach-generic/bigsmp.c @@ -42,10 +42,9 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + return cpumask_of_cpu(cpu); } static int probe_bigsmp(void) diff --git a/trunk/arch/x86/mach-generic/es7000.c b/trunk/arch/x86/mach-generic/es7000.c index 4ba5ccaa1584..7b4e6d0d1690 100644 --- a/trunk/arch/x86/mach-generic/es7000.c +++ b/trunk/arch/x86/mach-generic/es7000.c @@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -97,7 +97,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/trunk/arch/x86/mach-generic/numaq.c b/trunk/arch/x86/mach-generic/numaq.c index 511d7941364f..71a309b122e6 100644 --- a/trunk/arch/x86/mach-generic/numaq.c +++ b/trunk/arch/x86/mach-generic/numaq.c @@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -48,7 +48,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/trunk/arch/x86/mach-generic/summit.c b/trunk/arch/x86/mach-generic/summit.c index 2821ffc188b5..2c6d234e0009 100644 --- a/trunk/arch/x86/mach-generic/summit.c +++ b/trunk/arch/x86/mach-generic/summit.c @@ -24,7 +24,7 @@ static int probe_summit(void) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -34,7 +34,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/trunk/arch/x86/mach-voyager/voyager_smp.c b/trunk/arch/x86/mach-voyager/voyager_smp.c index a5bc05492b1e..52145007bd7e 100644 --- a/trunk/arch/x86/mach-voyager/voyager_smp.c +++ b/trunk/arch/x86/mach-voyager/voyager_smp.c @@ -63,6 +63,11 @@ static int voyager_extended_cpus = 1; /* Used for the invalidate map that's also checked in the spinlock */ static volatile unsigned long smp_invalidate_needed; +/* Bitmask of currently online CPUs - used by setup.c for + /proc/cpuinfo, visible externally but still physical */ +cpumask_t cpu_online_map = CPU_MASK_NONE; +EXPORT_SYMBOL(cpu_online_map); + /* Bitmask of CPUs present in the system - exported by i386_syms.c, used * by scheduler but indexed physically */ cpumask_t phys_cpu_present_map = CPU_MASK_NONE; @@ -213,6 +218,8 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* This is for the new dynamic CPU boot code */ cpumask_t cpu_callin_map = CPU_MASK_NONE; cpumask_t cpu_callout_map = CPU_MASK_NONE; +cpumask_t cpu_possible_map = CPU_MASK_NONE; +EXPORT_SYMBOL(cpu_possible_map); /* The per processor IRQ masks (these are usually kept in sync) */ static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; @@ -672,7 +679,7 @@ void __init smp_boot_cpus(void) /* loop over all the extended VIC CPUs and boot them. The * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) continue; do_boot_cpu(i); diff --git a/trunk/arch/x86/mm/numa_64.c b/trunk/arch/x86/mm/numa_64.c index 71a14f89f89e..cebcbf152d46 100644 --- a/trunk/arch/x86/mm/numa_64.c +++ b/trunk/arch/x86/mm/numa_64.c @@ -278,7 +278,7 @@ void __init numa_init_array(void) int rr, i; rr = first_node(node_online_map); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); @@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) memnodemap[0] = 0; node_set_online(0); node_set(0, node_possible_map); - for (i = 0; i < nr_cpu_ids; i++) + for (i = 0; i < NR_CPUS; i++) numa_set_node(i, 0); e820_register_active_regions(0, start_pfn, last_pfn); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); diff --git a/trunk/arch/x86/mm/srat_64.c b/trunk/arch/x86/mm/srat_64.c index 09737c8af074..51c0a2fc14fe 100644 --- a/trunk/arch/x86/mm/srat_64.c +++ b/trunk/arch/x86/mm/srat_64.c @@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (!node_online(i)) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { int node = early_cpu_to_node(i); if (node == NUMA_NO_NODE) diff --git a/trunk/arch/x86/pci/acpi.c b/trunk/arch/x86/pci/acpi.c index 9e5752fe4d15..1d88d2b39771 100644 --- a/trunk/arch/x86/pci/acpi.c +++ b/trunk/arch/x86/pci/acpi.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include "pci.h" struct pci_root_info { char *name; diff --git a/trunk/arch/x86/pci/amd_bus.c b/trunk/arch/x86/pci/amd_bus.c index 9bb09823b362..22e057665e55 100644 --- a/trunk/arch/x86/pci/amd_bus.c +++ b/trunk/arch/x86/pci/amd_bus.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include "pci.h" #ifdef CONFIG_X86_64 #include diff --git a/trunk/arch/x86/pci/common.c b/trunk/arch/x86/pci/common.c index 62ddb73e09ed..bb1a01f089e2 100644 --- a/trunk/arch/x86/pci/common.c +++ b/trunk/arch/x86/pci/common.c @@ -14,7 +14,8 @@ #include #include #include -#include + +#include "pci.h" unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | PCI_PROBE_MMCONF; diff --git a/trunk/arch/x86/pci/direct.c b/trunk/arch/x86/pci/direct.c index bd13c3e4c6db..9a5af6c8fbe9 100644 --- a/trunk/arch/x86/pci/direct.c +++ b/trunk/arch/x86/pci/direct.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include "pci.h" /* * Functions for accessing PCI base (first 256 bytes) and extended diff --git a/trunk/arch/x86/pci/early.c b/trunk/arch/x86/pci/early.c index f6adf2c6d751..86631ccbc25a 100644 --- a/trunk/arch/x86/pci/early.c +++ b/trunk/arch/x86/pci/early.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include "pci.h" /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ diff --git a/trunk/arch/x86/pci/fixup.c b/trunk/arch/x86/pci/fixup.c index 7d388d5cf548..2051dc96b8e9 100644 --- a/trunk/arch/x86/pci/fixup.c +++ b/trunk/arch/x86/pci/fixup.c @@ -6,7 +6,8 @@ #include #include #include -#include +#include "pci.h" + static void __devinit pci_fixup_i450nx(struct pci_dev *d) { diff --git a/trunk/arch/x86/pci/i386.c b/trunk/arch/x86/pci/i386.c index e51bf2cda4b0..844df0cbbd3e 100644 --- a/trunk/arch/x86/pci/i386.c +++ b/trunk/arch/x86/pci/i386.c @@ -34,8 +34,8 @@ #include #include -#include +#include "pci.h" static int skip_isa_ioresource_align(struct pci_dev *dev) { diff --git a/trunk/arch/x86/pci/init.c b/trunk/arch/x86/pci/init.c index bec3b048e72b..d6c950f81858 100644 --- a/trunk/arch/x86/pci/init.c +++ b/trunk/arch/x86/pci/init.c @@ -1,6 +1,6 @@ #include #include -#include +#include "pci.h" /* arch_initcall has too random ordering, so call the initializers in the right sequence from here. */ diff --git a/trunk/arch/x86/pci/irq.c b/trunk/arch/x86/pci/irq.c index 373b9afe6d44..bf69dbe08bff 100644 --- a/trunk/arch/x86/pci/irq.c +++ b/trunk/arch/x86/pci/irq.c @@ -16,7 +16,8 @@ #include #include #include -#include + +#include "pci.h" #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) #define PIRQ_VERSION 0x0100 diff --git a/trunk/arch/x86/pci/legacy.c b/trunk/arch/x86/pci/legacy.c index f1065b129e9c..b722dd481b39 100644 --- a/trunk/arch/x86/pci/legacy.c +++ b/trunk/arch/x86/pci/legacy.c @@ -3,7 +3,7 @@ */ #include #include -#include +#include "pci.h" /* * Discover remaining PCI buses in case there are peer host bridges. diff --git a/trunk/arch/x86/pci/mmconfig-shared.c b/trunk/arch/x86/pci/mmconfig-shared.c index 89bf9242c80a..654a2234f8f3 100644 --- a/trunk/arch/x86/pci/mmconfig-shared.c +++ b/trunk/arch/x86/pci/mmconfig-shared.c @@ -15,7 +15,8 @@ #include #include #include -#include + +#include "pci.h" /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) diff --git a/trunk/arch/x86/pci/mmconfig_32.c b/trunk/arch/x86/pci/mmconfig_32.c index 8b2d561046a3..f3c761dce695 100644 --- a/trunk/arch/x86/pci/mmconfig_32.c +++ b/trunk/arch/x86/pci/mmconfig_32.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include "pci.h" /* Assume systems with more busses have correct MCFG */ #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) diff --git a/trunk/arch/x86/pci/mmconfig_64.c b/trunk/arch/x86/pci/mmconfig_64.c index 30007ffc8e11..a1994163c99d 100644 --- a/trunk/arch/x86/pci/mmconfig_64.c +++ b/trunk/arch/x86/pci/mmconfig_64.c @@ -10,7 +10,8 @@ #include #include #include -#include + +#include "pci.h" /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { diff --git a/trunk/arch/x86/pci/numaq_32.c b/trunk/arch/x86/pci/numaq_32.c index 2089354968a2..1177845d3186 100644 --- a/trunk/arch/x86/pci/numaq_32.c +++ b/trunk/arch/x86/pci/numaq_32.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include "pci.h" #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ diff --git a/trunk/arch/x86/pci/olpc.c b/trunk/arch/x86/pci/olpc.c index b889d824f7c6..e11e9e803d5f 100644 --- a/trunk/arch/x86/pci/olpc.c +++ b/trunk/arch/x86/pci/olpc.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include "pci.h" /* * In the tables below, the first two line (8 longwords) are the diff --git a/trunk/arch/x86/pci/pcbios.c b/trunk/arch/x86/pci/pcbios.c index b82cae970dfd..37472fc6f729 100644 --- a/trunk/arch/x86/pci/pcbios.c +++ b/trunk/arch/x86/pci/pcbios.c @@ -6,8 +6,9 @@ #include #include #include -#include -#include +#include "pci.h" +#include "pci-functions.h" + /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff --git a/trunk/arch/x86/include/asm/pci_x86.h b/trunk/arch/x86/pci/pci.h similarity index 88% rename from trunk/arch/x86/include/asm/pci_x86.h rename to trunk/arch/x86/pci/pci.h index e60fd3e14bdf..1959018aac02 100644 --- a/trunk/arch/x86/include/asm/pci_x86.h +++ b/trunk/arch/x86/pci/pci.h @@ -57,8 +57,7 @@ extern struct pci_ops pci_root_ops; struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { - u8 link; /* IRQ line ID, chipset dependent, - 0 = not routed */ + u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ u16 bitmap; /* Available IRQs */ } __attribute__((packed)) irq[4]; u8 slot; /* Slot number, 0=onboard */ @@ -70,13 +69,11 @@ struct irq_routing_table { u16 version; /* PIRQ_VERSION */ u16 size; /* Table size in bytes */ u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to - PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of - interrupt router */ + u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ u32 miniport_data; /* Crap */ u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give 0 */ + u8 checksum; /* Modulo 256 checksum must give zero */ struct irq_info slots[0]; } __attribute__((packed)); @@ -151,15 +148,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos) static inline void mmio_config_writeb(void __iomem *pos, u8 val) { - asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); + asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); } static inline void mmio_config_writew(void __iomem *pos, u16 val) { - asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); + asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); } static inline void mmio_config_writel(void __iomem *pos, u32 val) { - asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); + asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); } diff --git a/trunk/arch/x86/pci/visws.c b/trunk/arch/x86/pci/visws.c index 16d0c0eb0d19..42f4cb19faca 100644 --- a/trunk/arch/x86/pci/visws.c +++ b/trunk/arch/x86/pci/visws.c @@ -9,10 +9,11 @@ #include #include -#include #include #include +#include "pci.h" + static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } static void pci_visws_disable_irq(struct pci_dev *dev) { } diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c index 503c240e26c7..773d68d3e912 100644 --- a/trunk/arch/x86/xen/mmu.c +++ b/trunk/arch/x86/xen/mmu.c @@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info) static void xen_drop_mm_ref(struct mm_struct *mm) { - cpumask_var_t mask; + cpumask_t mask; unsigned cpu; if (current->active_mm == mm) { @@ -1094,16 +1094,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) } /* Get the "official" set of cpus referring to our pagetable. */ - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) - && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) - continue; - smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); - } - return; - } - cpumask_copy(mask, &mm->cpu_vm_mask); + mask = mm->cpu_vm_mask; /* It's possible that a vcpu may have a stale reference to our cr3, because its in lazy mode, and it hasn't yet flushed @@ -1112,12 +1103,11 @@ static void xen_drop_mm_ref(struct mm_struct *mm) if needed. */ for_each_online_cpu(cpu) { if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) - cpumask_set_cpu(cpu, mask); + cpu_set(cpu, mask); } - if (!cpumask_empty(mask)) - smp_call_function_many(mask, drop_other_mm_ref, mm, 1); - free_cpumask_var(mask); + if (!cpus_empty(mask)) + smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); } #else static void xen_drop_mm_ref(struct mm_struct *mm) diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c index c44e2069c7c7..acd9b6705e02 100644 --- a/trunk/arch/x86/xen/smp.c +++ b/trunk/arch/x86/xen/smp.c @@ -33,7 +33,7 @@ #include "xen-ops.h" #include "mmu.h" -cpumask_var_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, callfunc_irq); @@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) { int i, rc; - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; @@ -192,14 +192,11 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) if (xen_smp_intr_init(0)) BUG(); - if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) - panic("could not allocate xen_cpu_initialized_map\n"); - - cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); + xen_cpu_initialized_map = cpumask_of_cpu(0); /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) + for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) continue; cpu_clear(cpu, cpu_possible_map); } @@ -224,7 +221,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct vcpu_guest_context *ctxt; struct desc_struct *gdt; - if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) + if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); @@ -411,23 +408,24 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(const struct cpumask *mask, - enum ipi_vector vector) +static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) { unsigned cpu; - for_each_cpu_and(cpu, mask, cpu_online_mask) + cpus_and(mask, mask, cpu_online_map); + + for_each_cpu_mask_nr(cpu, mask) xen_send_IPI_one(cpu, vector); } -static void xen_smp_send_call_function_ipi(const struct cpumask *mask) +static void xen_smp_send_call_function_ipi(cpumask_t mask) { int cpu; xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { if (xen_vcpu_stolen(cpu)) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; @@ -437,8 +435,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(cpumask_of(cpu), - XEN_CALL_FUNCTION_SINGLE_VECTOR); + xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) diff --git a/trunk/arch/x86/xen/suspend.c b/trunk/arch/x86/xen/suspend.c index 212ffe012b76..2a234db5949b 100644 --- a/trunk/arch/x86/xen/suspend.c +++ b/trunk/arch/x86/xen/suspend.c @@ -35,8 +35,7 @@ void xen_post_suspend(int suspend_cancelled) pfn_to_mfn(xen_start_info->console.domU.mfn); } else { #ifdef CONFIG_SMP - BUG_ON(xen_cpu_initialized_map == NULL); - cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); + xen_cpu_initialized_map = cpu_online_map; #endif xen_vcpu_restore(); } diff --git a/trunk/arch/x86/xen/time.c b/trunk/arch/x86/xen/time.c index 65d75a6be0ba..c9f7cda48ed7 100644 --- a/trunk/arch/x86/xen/time.c +++ b/trunk/arch/x86/xen/time.c @@ -437,7 +437,7 @@ void xen_setup_timer(int cpu) evt = &per_cpu(xen_clock_events, cpu); memcpy(evt, xen_clockevent, sizeof(*evt)); - evt->cpumask = cpumask_of(cpu); + evt->cpumask = cpumask_of_cpu(cpu); evt->irq = irq; setup_runstate_info(cpu); diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h index c1f8faf0a2c5..9e1afae8461f 100644 --- a/trunk/arch/x86/xen/xen-ops.h +++ b/trunk/arch/x86/xen/xen-ops.h @@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void); __cpuinit void xen_init_lock_cpu(int cpu); void xen_uninit_lock_cpu(int cpu); -extern cpumask_var_t xen_cpu_initialized_map; +extern cpumask_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} #endif diff --git a/trunk/drivers/base/cpu.c b/trunk/drivers/base/cpu.c index 4259072f5bd0..64f5d54f7edc 100644 --- a/trunk/drivers/base/cpu.c +++ b/trunk/drivers/base/cpu.c @@ -109,7 +109,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); */ static ssize_t print_cpus_map(char *buf, cpumask_t *map) { - int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map); + int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map); buf[n++] = '\n'; buf[n] = '\0'; diff --git a/trunk/drivers/base/node.c b/trunk/drivers/base/node.c index 91636cd8b6c9..f5207090885a 100644 --- a/trunk/drivers/base/node.c +++ b/trunk/drivers/base/node.c @@ -30,8 +30,8 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); len = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, mask) : - cpumask_scnprintf(buf, PAGE_SIZE-2, mask); + cpulist_scnprintf(buf, PAGE_SIZE-2, *mask): + cpumask_scnprintf(buf, PAGE_SIZE-2, *mask); buf[len++] = '\n'; buf[len] = '\0'; return len; diff --git a/trunk/drivers/base/topology.c b/trunk/drivers/base/topology.c index a8bc1cbcfa7c..199cd97e32e6 100644 --- a/trunk/drivers/base/topology.c +++ b/trunk/drivers/base/topology.c @@ -49,8 +49,8 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) if (len > 1) { n = type? - cpulist_scnprintf(buf, len-2, mask) : - cpumask_scnprintf(buf, len-2, mask); + cpulist_scnprintf(buf, len-2, *mask): + cpumask_scnprintf(buf, len-2, *mask); buf[n++] = '\n'; buf[n] = '\0'; } diff --git a/trunk/drivers/clocksource/tcb_clksrc.c b/trunk/drivers/clocksource/tcb_clksrc.c index 254f1064d973..f450588e5858 100644 --- a/trunk/drivers/clocksource/tcb_clksrc.c +++ b/trunk/drivers/clocksource/tcb_clksrc.c @@ -154,6 +154,7 @@ static struct tc_clkevt_device clkevt = { .shift = 32, /* Should be lower than at91rm9200's system timer */ .rating = 125, + .cpumask = CPU_MASK_CPU0, .set_next_event = tc_next_event, .set_mode = tc_mode, }, @@ -194,7 +195,6 @@ static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) clkevt.clkevt.max_delta_ns = clockevent_delta2ns(0xffff, &clkevt.clkevt); clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1; - clkevt.clkevt.cpumask = cpumask_of(0); setup_irq(irq, &tc_irqaction); diff --git a/trunk/drivers/lguest/interrupts_and_traps.c b/trunk/drivers/lguest/interrupts_and_traps.c index 415fab0125ac..a1039068f95c 100644 --- a/trunk/drivers/lguest/interrupts_and_traps.c +++ b/trunk/drivers/lguest/interrupts_and_traps.c @@ -222,16 +222,11 @@ bool check_syscall_vector(struct lguest *lg) int init_interrupts(void) { /* If they want some strange system call vector, reserve it now */ - if (syscall_vector != SYSCALL_VECTOR) { - if (test_bit(syscall_vector, used_vectors) || - vector_used_by_percpu_irq(syscall_vector)) { - printk(KERN_ERR "lg: couldn't reserve syscall %u\n", - syscall_vector); - return -EBUSY; - } - set_bit(syscall_vector, used_vectors); + if (syscall_vector != SYSCALL_VECTOR + && test_and_set_bit(syscall_vector, used_vectors)) { + printk("lg: couldn't reserve syscall %u\n", syscall_vector); + return -EBUSY; } - return 0; } diff --git a/trunk/drivers/parisc/iosapic.c b/trunk/drivers/parisc/iosapic.c index 9dedbbd218c3..7beffcab2745 100644 --- a/trunk/drivers/parisc/iosapic.c +++ b/trunk/drivers/parisc/iosapic.c @@ -704,17 +704,16 @@ static unsigned int iosapic_startup_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void iosapic_set_affinity_irq(unsigned int irq, - const struct cpumask *dest) +static void iosapic_set_affinity_irq(unsigned int irq, cpumask_t dest) { struct vector_info *vi = iosapic_get_vector(irq); u32 d0, d1, dummy_d0; unsigned long flags; - if (cpu_check_affinity(irq, dest)) + if (cpu_check_affinity(irq, &dest)) return; - vi->txn_addr = txn_affinity_addr(irq, cpumask_first(dest)); + vi->txn_addr = txn_affinity_addr(irq, first_cpu(dest)); spin_lock_irqsave(&iosapic_lock, flags); /* d1 contains the destination CPU, so only want to set that diff --git a/trunk/drivers/pci/hotplug/cpqphp_core.c b/trunk/drivers/pci/hotplug/cpqphp_core.c index c2e1bcbb28a7..8514c3a1746a 100644 --- a/trunk/drivers/pci/hotplug/cpqphp_core.c +++ b/trunk/drivers/pci/hotplug/cpqphp_core.c @@ -45,7 +45,7 @@ #include "cpqphp.h" #include "cpqphp_nvram.h" -#include +#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ /* Global variables */ diff --git a/trunk/drivers/pci/hotplug/cpqphp_pci.c b/trunk/drivers/pci/hotplug/cpqphp_pci.c index df146be9d2e9..09021930589f 100644 --- a/trunk/drivers/pci/hotplug/cpqphp_pci.c +++ b/trunk/drivers/pci/hotplug/cpqphp_pci.c @@ -37,7 +37,7 @@ #include "../pci.h" #include "cpqphp.h" #include "cpqphp_nvram.h" -#include +#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ u8 cpqhp_nic_irq; diff --git a/trunk/drivers/pci/hotplug/ibmphp_core.c b/trunk/drivers/pci/hotplug/ibmphp_core.c index dd18f857dfb0..633e743442ac 100644 --- a/trunk/drivers/pci/hotplug/ibmphp_core.c +++ b/trunk/drivers/pci/hotplug/ibmphp_core.c @@ -35,7 +35,7 @@ #include #include #include "../pci.h" -#include /* for struct irq_routing_table */ +#include "../../../arch/x86/pci/pci.h" /* for struct irq_routing_table */ #include "ibmphp.h" #define attn_on(sl) ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON) diff --git a/trunk/drivers/pci/pci-sysfs.c b/trunk/drivers/pci/pci-sysfs.c index c88485860a0a..5d72866897a8 100644 --- a/trunk/drivers/pci/pci-sysfs.c +++ b/trunk/drivers/pci/pci-sysfs.c @@ -74,7 +74,7 @@ static ssize_t local_cpus_show(struct device *dev, int len; mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask); + len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; return len; @@ -88,7 +88,7 @@ static ssize_t local_cpulist_show(struct device *dev, int len; mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask); + len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; return len; diff --git a/trunk/drivers/pci/probe.c b/trunk/drivers/pci/probe.c index 5b3f5937ecf5..003a9b3c293f 100644 --- a/trunk/drivers/pci/probe.c +++ b/trunk/drivers/pci/probe.c @@ -55,8 +55,8 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev, cpumask = pcibus_to_cpumask(to_pci_bus(dev)); ret = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) : - cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask); + cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask): + cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask); buf[ret++] = '\n'; buf[ret] = '\0'; return ret; diff --git a/trunk/drivers/xen/events.c b/trunk/drivers/xen/events.c index eb0dfdeaa949..e26733a9df21 100644 --- a/trunk/drivers/xen/events.c +++ b/trunk/drivers/xen/events.c @@ -585,7 +585,7 @@ void rebind_evtchn_irq(int evtchn, int irq) spin_unlock(&irq_mapping_update_lock); /* new event channels are always bound to cpu 0 */ - irq_set_affinity(irq, cpumask_of(0)); + irq_set_affinity(irq, cpumask_of_cpu(0)); /* Unmask the event channel. */ enable_irq(irq); @@ -614,9 +614,9 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) } -static void set_affinity_irq(unsigned irq, const struct cpumask *dest) +static void set_affinity_irq(unsigned irq, cpumask_t dest) { - unsigned tcpu = cpumask_first(dest); + unsigned tcpu = first_cpu(dest); rebind_irq_to_cpu(irq, tcpu); } diff --git a/trunk/fs/anon_inodes.c b/trunk/fs/anon_inodes.c index 3bbdb9d02376..c16d9be1b017 100644 --- a/trunk/fs/anon_inodes.c +++ b/trunk/fs/anon_inodes.c @@ -79,12 +79,9 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, if (IS_ERR(anon_inode_inode)) return -ENODEV; - if (fops->owner && !try_module_get(fops->owner)) - return -ENOENT; - error = get_unused_fd_flags(flags); if (error < 0) - goto err_module; + return error; fd = error; /* @@ -131,8 +128,6 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, dput(dentry); err_put_unused_fd: put_unused_fd(fd); -err_module: - module_put(fops->owner); return error; } EXPORT_SYMBOL_GPL(anon_inode_getfd); diff --git a/trunk/include/asm-generic/topology.h b/trunk/include/asm-generic/topology.h index 0e9e2bc0ee96..54bbf6e04ee8 100644 --- a/trunk/include/asm-generic/topology.h +++ b/trunk/include/asm-generic/topology.h @@ -40,9 +40,6 @@ #ifndef node_to_cpumask #define node_to_cpumask(node) ((void)node, cpu_online_map) #endif -#ifndef cpumask_of_node -#define cpumask_of_node(node) ((void)node, cpu_online_mask) -#endif #ifndef node_to_first_cpu #define node_to_first_cpu(node) ((void)(node),0) #endif @@ -57,18 +54,9 @@ ) #endif -#ifndef cpumask_of_pcibus -#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ - cpu_all_mask : \ - cpumask_of_node(pcibus_to_node(bus))) -#endif - #endif /* CONFIG_NUMA */ -/* - * returns pointer to cpumask for specified node - * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" - */ +/* returns pointer to cpumask for specified node */ #ifndef node_to_cpumask_ptr #define node_to_cpumask_ptr(v, node) \ diff --git a/trunk/include/asm-m32r/smp.h b/trunk/include/asm-m32r/smp.h index b96a6d2ffbc3..c5dd66916692 100644 --- a/trunk/include/asm-m32r/smp.h +++ b/trunk/include/asm-m32r/smp.h @@ -63,6 +63,8 @@ extern volatile int cpu_2_physid[NR_CPUS]; #define raw_smp_processor_id() (current_thread_info()->cpu) extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_possible_map; +extern cpumask_t cpu_present_map; static __inline__ int hard_smp_processor_id(void) { diff --git a/trunk/include/linux/clockchips.h b/trunk/include/linux/clockchips.h index cea153697ec7..ed3a5d473e52 100644 --- a/trunk/include/linux/clockchips.h +++ b/trunk/include/linux/clockchips.h @@ -82,13 +82,13 @@ struct clock_event_device { int shift; int rating; int irq; - const struct cpumask *cpumask; + cpumask_t cpumask; int (*set_next_event)(unsigned long evt, struct clock_event_device *); void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); void (*event_handler)(struct clock_event_device *); - void (*broadcast)(const struct cpumask *mask); + void (*broadcast)(cpumask_t mask); struct list_head list; enum clock_event_mode mode; ktime_t next_event; diff --git a/trunk/include/linux/compiler-gcc3.h b/trunk/include/linux/compiler-gcc3.h index 8005effc04f1..2befe6513ce4 100644 --- a/trunk/include/linux/compiler-gcc3.h +++ b/trunk/include/linux/compiler-gcc3.h @@ -2,10 +2,6 @@ #error "Please don't include directly, include instead." #endif -#if __GNUC_MINOR__ < 2 -# error Sorry, your compiler is too old - please upgrade it. -#endif - #if __GNUC_MINOR__ >= 3 # define __used __attribute__((__used__)) #else diff --git a/trunk/include/linux/cpumask.h b/trunk/include/linux/cpumask.h index d4bf52603e6b..21e1dd43e52a 100644 --- a/trunk/include/linux/cpumask.h +++ b/trunk/include/linux/cpumask.h @@ -339,6 +339,36 @@ extern cpumask_t cpu_mask_all; #endif #define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) +#define cpumask_scnprintf(buf, len, src) \ + __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpumask_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nbits); +} + +#define cpumask_parse_user(ubuf, ulen, dst) \ + __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) +static inline int __cpumask_parse_user(const char __user *buf, int len, + cpumask_t *dstp, int nbits) +{ + return bitmap_parse_user(buf, len, dstp->bits, nbits); +} + +#define cpulist_scnprintf(buf, len, src) \ + __cpulist_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpulist_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); +} + +#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS) +static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits) +{ + return bitmap_parselist(buf, dstp->bits, nbits); +} + #define cpu_remap(oldbit, old, new) \ __cpu_remap((oldbit), &(old), &(new), NR_CPUS) static inline int __cpu_remap(int oldbit, @@ -510,6 +540,9 @@ extern cpumask_t cpu_active_map; [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } +/* This produces more efficient code. */ +#define nr_cpumask_bits NR_CPUS + #else /* NR_CPUS > BITS_PER_LONG */ #define CPU_BITS_ALL \ @@ -517,15 +550,9 @@ extern cpumask_t cpu_active_map; [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } -#endif /* NR_CPUS > BITS_PER_LONG */ -#ifdef CONFIG_CPUMASK_OFFSTACK -/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, - * not all bits may be allocated. */ #define nr_cpumask_bits nr_cpu_ids -#else -#define nr_cpumask_bits NR_CPUS -#endif +#endif /* NR_CPUS > BITS_PER_LONG */ /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) @@ -918,63 +945,6 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) -/** - * cpumask_scnprintf - print a cpumask into a string as comma-separated hex - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpumask_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits); -} - -/** - * cpumask_parse_user - extract a cpumask from a user string - * @buf: the buffer to extract from - * @len: the length of the buffer - * @dstp: the cpumask to set. - * - * Returns -errno, or 0 for success. - */ -static inline int cpumask_parse_user(const char __user *buf, int len, - struct cpumask *dstp) -{ - return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits); -} - -/** - * cpulist_scnprintf - print a cpumask into a string as comma-separated list - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpulist_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits); -} - -/** - * cpulist_parse_user - extract a cpumask from a user string of ranges - * @buf: the buffer to extract from - * @len: the length of the buffer - * @dstp: the cpumask to set. - * - * Returns -errno, or 0 for success. - */ -static inline int cpulist_parse(const char *buf, struct cpumask *dstp) -{ - return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits); -} - /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap diff --git a/trunk/include/linux/interrupt.h b/trunk/include/linux/interrupt.h index 990355fbc54e..8cc8ef47f5b6 100644 --- a/trunk/include/linux/interrupt.h +++ b/trunk/include/linux/interrupt.h @@ -111,13 +111,13 @@ extern void enable_irq(unsigned int irq); extern cpumask_t irq_default_affinity; -extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); +extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); #else /* CONFIG_SMP */ -static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) +static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask) { return -EINVAL; } diff --git a/trunk/include/linux/irq.h b/trunk/include/linux/irq.h index f899b502f186..d64a6d49bdef 100644 --- a/trunk/include/linux/irq.h +++ b/trunk/include/linux/irq.h @@ -113,8 +113,7 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, - const struct cpumask *dest); + void (*set_affinity)(unsigned int irq, cpumask_t dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); int (*set_wake)(unsigned int irq, unsigned int on); diff --git a/trunk/include/linux/kvm.h b/trunk/include/linux/kvm.h index 35525ac63337..f18b86fa8655 100644 --- a/trunk/include/linux/kvm.h +++ b/trunk/include/linux/kvm.h @@ -83,7 +83,6 @@ struct kvm_irqchip { #define KVM_EXIT_S390_SIEIC 13 #define KVM_EXIT_S390_RESET 14 #define KVM_EXIT_DCR 15 -#define KVM_EXIT_NMI 16 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -388,14 +387,6 @@ struct kvm_trace_rec { #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 -#if defined(CONFIG_X86) -#define KVM_CAP_DEVICE_MSI 20 -#endif -/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ -#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#if defined(CONFIG_X86) -#define KVM_CAP_USER_NMI 22 -#endif /* * ioctls for VM fds @@ -467,8 +458,6 @@ struct kvm_trace_rec { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) -/* Available with KVM_CAP_NMI */ -#define KVM_NMI _IO(KVMIO, 0x9a) #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) @@ -511,17 +500,10 @@ struct kvm_assigned_irq { __u32 guest_irq; __u32 flags; union { - struct { - __u32 addr_lo; - __u32 addr_hi; - __u32 data; - } guest_msi; __u32 reserved[12]; }; }; #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) - #endif diff --git a/trunk/include/linux/kvm_host.h b/trunk/include/linux/kvm_host.h index eafabd5c66b2..bb92be2153bc 100644 --- a/trunk/include/linux/kvm_host.h +++ b/trunk/include/linux/kvm_host.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -307,14 +306,8 @@ struct kvm_assigned_dev_kernel { int host_busnr; int host_devfn; int host_irq; - bool host_irq_disabled; int guest_irq; - struct msi_msg guest_msi; -#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) -#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) -#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) -#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) - unsigned long irq_requested_type; + int irq_requested; int irq_source_id; struct pci_dev *dev; struct kvm *kvm; @@ -323,7 +316,8 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 158d53d07765..8395e715809d 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -250,7 +250,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(void); extern void task_rq_unlock_wait(struct task_struct *p); -extern cpumask_var_t nohz_cpu_mask; +extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); #else @@ -758,51 +758,20 @@ enum cpu_idle_type { #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ -enum powersavings_balance_level { - POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ - POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package - * first for long running threads - */ - POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle - * cpu package for power savings - */ - MAX_POWERSAVINGS_BALANCE_LEVELS -}; +#define BALANCE_FOR_MC_POWER \ + (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) -extern int sched_mc_power_savings, sched_smt_power_savings; +#define BALANCE_FOR_PKG_POWER \ + ((sched_mc_power_savings || sched_smt_power_savings) ? \ + SD_POWERSAVINGS_BALANCE : 0) -static inline int sd_balance_for_mc_power(void) -{ - if (sched_smt_power_savings) - return SD_POWERSAVINGS_BALANCE; +#define test_sd_parent(sd, flag) ((sd->parent && \ + (sd->parent->flags & flag)) ? 1 : 0) - return 0; -} - -static inline int sd_balance_for_package_power(void) -{ - if (sched_mc_power_savings | sched_smt_power_savings) - return SD_POWERSAVINGS_BALANCE; - - return 0; -} - -/* - * Optimise SD flags for power savings: - * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings. - * Keep default SD flags if sched_{smt,mc}_power_saving=0 - */ - -static inline int sd_power_saving_flags(void) -{ - if (sched_mc_power_savings | sched_smt_power_savings) - return SD_BALANCE_NEWIDLE; - - return 0; -} struct sched_group { struct sched_group *next; /* Must be a circular list */ + cpumask_t cpumask; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a @@ -815,15 +784,8 @@ struct sched_group { * (see include/linux/reciprocal_div.h) */ u32 reciprocal_cpu_power; - - unsigned long cpumask[]; }; -static inline struct cpumask *sched_group_cpus(struct sched_group *sg) -{ - return to_cpumask(sg->cpumask); -} - enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, @@ -847,6 +809,7 @@ struct sched_domain { struct sched_domain *parent; /* top domain must be null terminated */ struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ + cpumask_t span; /* span of all CPUs in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ @@ -901,35 +864,18 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif - - /* span of all CPUs in this domain */ - unsigned long span[]; }; -static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -{ - return to_cpumask(sd->span); -} - -extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); -/* Test a flag in parent sched domain */ -static inline int test_sd_parent(struct sched_domain *sd, int flag) -{ - if (sd->parent && (sd->parent->flags & flag)) - return 1; - - return 0; -} - #else /* CONFIG_SMP */ struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { } @@ -980,7 +926,7 @@ struct sched_class { void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, - const struct cpumask *newmask); + const cpumask_t *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); @@ -1633,12 +1579,12 @@ extern cputime_t task_gtime(struct task_struct *p); #ifdef CONFIG_SMP extern int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask); + const cpumask_t *new_mask); #else static inline int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask) + const cpumask_t *new_mask) { - if (!cpumask_test_cpu(0, new_mask)) + if (!cpu_isset(0, *new_mask)) return -EINVAL; return 0; } @@ -2249,8 +2195,10 @@ __trace_special(void *__tr, void *__data, } #endif -extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); -extern long sched_getaffinity(pid_t pid, struct cpumask *mask); +extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); +extern long sched_getaffinity(pid_t pid, cpumask_t *mask); + +extern int sched_mc_power_savings, sched_smt_power_savings; extern void normalize_rt_tasks(void); diff --git a/trunk/include/linux/topology.h b/trunk/include/linux/topology.h index e632d29f0544..0c5b5ac36d8e 100644 --- a/trunk/include/linux/topology.h +++ b/trunk/include/linux/topology.h @@ -125,8 +125,7 @@ int arch_update_cpu_topology(void); | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ - | sd_balance_for_mc_power()\ - | sd_power_saving_flags(),\ + | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ } @@ -151,8 +150,7 @@ int arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ - | sd_balance_for_package_power()\ - | sd_power_saving_flags(),\ + | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index f6281711166d..13627191a60d 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -924,15 +924,6 @@ config KMOD endif # MODULES -config INIT_ALL_POSSIBLE - bool - help - Back when each arch used to define their own cpu_online_map and - cpu_possible_map, some of them chose to initialize cpu_possible_map - with all 1s, and others with all 0s. When they were centralised, - it was better to provide this option than to break all the archs - and have several arch maintainers persuing me down dark alleys. - config STOP_MACHINE bool default y diff --git a/trunk/kernel/cpu.c b/trunk/kernel/cpu.c index bae131a1211b..8ea32e8d68b0 100644 --- a/trunk/kernel/cpu.c +++ b/trunk/kernel/cpu.c @@ -24,20 +24,19 @@ cpumask_t cpu_present_map __read_mostly; EXPORT_SYMBOL(cpu_present_map); +#ifndef CONFIG_SMP + /* * Represents all cpu's that are currently online. */ -cpumask_t cpu_online_map __read_mostly; +cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; EXPORT_SYMBOL(cpu_online_map); -#ifdef CONFIG_INIT_ALL_POSSIBLE cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; -#else -cpumask_t cpu_possible_map __read_mostly; -#endif EXPORT_SYMBOL(cpu_possible_map); -#ifdef CONFIG_SMP +#else /* CONFIG_SMP */ + /* Serializes the updates to cpu_online_map, cpu_present_map */ static DEFINE_MUTEX(cpu_add_remove_lock); diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c index 39c1a4c1c5a9..96c0ba13b8cd 100644 --- a/trunk/kernel/cpuset.c +++ b/trunk/kernel/cpuset.c @@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf) if (!*buf) { cpus_clear(trialcs.cpus_allowed); } else { - retval = cpulist_parse(buf, &trialcs.cpus_allowed); + retval = cpulist_parse(buf, trialcs.cpus_allowed); if (retval < 0) return retval; @@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) mask = cs->cpus_allowed; mutex_unlock(&callback_mutex); - return cpulist_scnprintf(page, PAGE_SIZE, &mask); + return cpulist_scnprintf(page, PAGE_SIZE, mask); } static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) diff --git a/trunk/kernel/irq/chip.c b/trunk/kernel/irq/chip.c index f63c706d25e1..6eb3c7952b64 100644 --- a/trunk/kernel/irq/chip.c +++ b/trunk/kernel/irq/chip.c @@ -46,7 +46,7 @@ void dynamic_irq_init(unsigned int irq) desc->irq_count = 0; desc->irqs_unhandled = 0; #ifdef CONFIG_SMP - cpumask_setall(&desc->affinity); + cpus_setall(desc->affinity); #endif spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/trunk/kernel/irq/manage.c b/trunk/kernel/irq/manage.c index 61c4a9b62165..540f6c49f3fa 100644 --- a/trunk/kernel/irq/manage.c +++ b/trunk/kernel/irq/manage.c @@ -79,7 +79,7 @@ int irq_can_set_affinity(unsigned int irq) * @cpumask: cpumask * */ -int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) +int irq_set_affinity(unsigned int irq, cpumask_t cpumask) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -91,14 +91,14 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - cpumask_copy(&desc->affinity, cpumask); + desc->affinity = cpumask; desc->chip->set_affinity(irq, cpumask); } else { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, cpumask); + desc->pending_mask = cpumask; } #else - cpumask_copy(&desc->affinity, cpumask); + desc->affinity = cpumask; desc->chip->set_affinity(irq, cpumask); #endif desc->status |= IRQ_AFFINITY_SET; @@ -112,24 +112,26 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) */ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) { + cpumask_t mask; + if (!irq_can_set_affinity(irq)) return 0; + cpus_and(mask, cpu_online_map, irq_default_affinity); + /* * Preserve an userspace affinity setup, but make sure that * one of the targets is online. */ if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { - if (cpumask_any_and(&desc->affinity, cpu_online_mask) - < nr_cpu_ids) - goto set_affinity; + if (cpus_intersects(desc->affinity, cpu_online_map)) + mask = desc->affinity; else desc->status &= ~IRQ_AFFINITY_SET; } - cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity); -set_affinity: - desc->chip->set_affinity(irq, &desc->affinity); + desc->affinity = mask; + desc->chip->set_affinity(irq, mask); return 0; } diff --git a/trunk/kernel/irq/migration.c b/trunk/kernel/irq/migration.c index bd72329e630c..9db681d95814 100644 --- a/trunk/kernel/irq/migration.c +++ b/trunk/kernel/irq/migration.c @@ -4,6 +4,7 @@ void move_masked_irq(int irq) { struct irq_desc *desc = irq_to_desc(irq); + cpumask_t tmp; if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; @@ -18,7 +19,7 @@ void move_masked_irq(int irq) desc->status &= ~IRQ_MOVE_PENDING; - if (unlikely(cpumask_empty(&desc->pending_mask))) + if (unlikely(cpus_empty(desc->pending_mask))) return; if (!desc->chip->set_affinity) @@ -26,6 +27,8 @@ void move_masked_irq(int irq) assert_spin_locked(&desc->lock); + cpus_and(tmp, desc->pending_mask, cpu_online_map); + /* * If there was a valid mask to work with, please * do the disable, re-program, enable sequence. @@ -38,13 +41,10 @@ void move_masked_irq(int irq) * For correct operation this depends on the caller * masking the irqs. */ - if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask) - < nr_cpu_ids)) { - cpumask_and(&desc->affinity, - &desc->pending_mask, cpu_online_mask); - desc->chip->set_affinity(irq, &desc->affinity); + if (likely(!cpus_empty(tmp))) { + desc->chip->set_affinity(irq,tmp); } - cpumask_clear(&desc->pending_mask); + cpus_clear(desc->pending_mask); } void move_native_irq(int irq) diff --git a/trunk/kernel/irq/proc.c b/trunk/kernel/irq/proc.c index d2c0e5ee53c5..f6b3440f05bc 100644 --- a/trunk/kernel/irq/proc.c +++ b/trunk/kernel/irq/proc.c @@ -40,42 +40,33 @@ static ssize_t irq_affinity_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; - cpumask_var_t new_value; + cpumask_t new_value; int err; if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity || irq_balancing_disabled(irq)) return -EIO; - if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) - return -ENOMEM; - err = cpumask_parse_user(buffer, count, new_value); if (err) - goto free_cpumask; + return err; - if (!is_affinity_mask_valid(*new_value)) { - err = -EINVAL; - goto free_cpumask; - } + if (!is_affinity_mask_valid(new_value)) + return -EINVAL; /* * Do not allow disabling IRQs completely - it's a too easy * way to make the system unusable accidentally :-) At least * one online CPU still has to be targeted. */ - if (!cpumask_intersects(new_value, cpu_online_mask)) { + if (!cpus_intersects(new_value, cpu_online_map)) /* Special case for empty set - allow the architecture code to set default SMP affinity. */ - err = irq_select_affinity_usr(irq) ? -EINVAL : count; - } else { - irq_set_affinity(irq, new_value); - err = count; - } + return irq_select_affinity_usr(irq) ? -EINVAL : count; + + irq_set_affinity(irq, new_value); -free_cpumask: - free_cpumask_var(new_value); - return err; + return count; } static int irq_affinity_proc_open(struct inode *inode, struct file *file) @@ -104,7 +95,7 @@ static ssize_t default_affinity_write(struct file *file, cpumask_t new_value; int err; - err = cpumask_parse_user(buffer, count, &new_value); + err = cpumask_parse_user(buffer, count, new_value); if (err) return err; diff --git a/trunk/kernel/profile.c b/trunk/kernel/profile.c index 4cb7d68fed82..60adefb59b5e 100644 --- a/trunk/kernel/profile.c +++ b/trunk/kernel/profile.c @@ -442,7 +442,7 @@ void profile_tick(int type) static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_scnprintf(page, count, (cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file, unsigned long full_count = count, err; cpumask_t new_value; - err = cpumask_parse_user(buffer, count, &new_value); + err = cpumask_parse_user(buffer, count, new_value); if (err) return err; diff --git a/trunk/kernel/rcuclassic.c b/trunk/kernel/rcuclassic.c index c03ca3e61919..e503a002f330 100644 --- a/trunk/kernel/rcuclassic.c +++ b/trunk/kernel/rcuclassic.c @@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) * unnecessarily. */ smp_mb(); - cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask); + cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); rcp->signaled = 0; } diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 27ba1d642f0f..fff1c4a20b65 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -498,26 +498,18 @@ struct rt_rq { */ struct root_domain { atomic_t refcount; - cpumask_var_t span; - cpumask_var_t online; + cpumask_t span; + cpumask_t online; /* * The "RT overload" flag: it gets set if a CPU has more than * one runnable RT task. */ - cpumask_var_t rto_mask; + cpumask_t rto_mask; atomic_t rto_count; #ifdef CONFIG_SMP struct cpupri cpupri; #endif -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - /* - * Preferred wake up cpu nominated by sched_mc balance that will be - * used when most cpus are idle in the system indicating overall very - * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) - */ - unsigned int sched_mc_preferred_wakeup_cpu; -#endif }; /* @@ -1522,7 +1514,7 @@ static int tg_shares_up(struct task_group *tg, void *data) struct sched_domain *sd = data; int i; - for_each_cpu(i, sched_domain_span(sd)) { + for_each_cpu_mask(i, sd->span) { /* * If there are currently no tasks on the cpu pretend there * is one of average load so that when a new task gets to @@ -1543,7 +1535,7 @@ static int tg_shares_up(struct task_group *tg, void *data) if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) shares = tg->shares; - for_each_cpu(i, sched_domain_span(sd)) + for_each_cpu_mask(i, sd->span) update_group_shares_cpu(tg, i, shares, rq_weight); return 0; @@ -2109,17 +2101,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) int i; /* Skip over this group if it has no CPUs allowed */ - if (!cpumask_intersects(sched_group_cpus(group), - &p->cpus_allowed)) + if (!cpus_intersects(group->cpumask, p->cpus_allowed)) continue; - local_group = cpumask_test_cpu(this_cpu, - sched_group_cpus(group)); + local_group = cpu_isset(this_cpu, group->cpumask); /* Tally up the load of all CPUs in the group */ avg_load = 0; - for_each_cpu(i, sched_group_cpus(group)) { + for_each_cpu_mask_nr(i, group->cpumask) { /* Bias balancing toward cpus of our domain */ if (local_group) load = source_load(i, load_idx); @@ -2151,14 +2141,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) * find_idlest_cpu - find the idlest cpu among the cpus in group. */ static int -find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) +find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, + cpumask_t *tmp) { unsigned long load, min_load = ULONG_MAX; int idlest = -1; int i; /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { + cpus_and(*tmp, group->cpumask, p->cpus_allowed); + + for_each_cpu_mask_nr(i, *tmp) { load = weighted_cpuload(i); if (load < min_load || (load == min_load && i == this_cpu)) { @@ -2200,6 +2193,7 @@ static int sched_balance_self(int cpu, int flag) update_shares(sd); while (sd) { + cpumask_t span, tmpmask; struct sched_group *group; int new_cpu, weight; @@ -2208,13 +2202,14 @@ static int sched_balance_self(int cpu, int flag) continue; } + span = sd->span; group = find_idlest_group(sd, t, cpu); if (!group) { sd = sd->child; continue; } - new_cpu = find_idlest_cpu(group, t, cpu); + new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); if (new_cpu == -1 || new_cpu == cpu) { /* Now try balancing at a lower domain level of cpu */ sd = sd->child; @@ -2223,10 +2218,10 @@ static int sched_balance_self(int cpu, int flag) /* Now try balancing at a lower domain level of new_cpu */ cpu = new_cpu; - weight = cpumask_weight(sched_domain_span(sd)); sd = NULL; + weight = cpus_weight(span); for_each_domain(cpu, tmp) { - if (weight <= cpumask_weight(sched_domain_span(tmp))) + if (weight <= cpus_weight(tmp->span)) break; if (tmp->flags & flag) sd = tmp; @@ -2271,7 +2266,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) cpu = task_cpu(p); for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (cpu_isset(cpu, sd->span)) { update_shares(sd); break; } @@ -2320,7 +2315,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) else { struct sched_domain *sd; for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (cpu_isset(cpu, sd->span)) { schedstat_inc(sd, ttwu_wake_remote); break; } @@ -2851,7 +2846,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) struct rq *rq; rq = task_rq_lock(p, &flags); - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) + if (!cpu_isset(dest_cpu, p->cpus_allowed) || unlikely(!cpu_active(dest_cpu))) goto out; @@ -2916,7 +2911,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { + if (!cpu_isset(this_cpu, p->cpus_allowed)) { schedstat_inc(p, se.nr_failed_migrations_affine); return 0; } @@ -3091,7 +3086,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long *imbalance, enum cpu_idle_type idle, - int *sd_idle, const struct cpumask *cpus, int *balance) + int *sd_idle, const cpumask_t *cpus, int *balance) { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; @@ -3127,11 +3122,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long sum_avg_load_per_task; unsigned long avg_load_per_task; - local_group = cpumask_test_cpu(this_cpu, - sched_group_cpus(group)); + local_group = cpu_isset(this_cpu, group->cpumask); if (local_group) - balance_cpu = cpumask_first(sched_group_cpus(group)); + balance_cpu = first_cpu(group->cpumask); /* Tally up the load of all CPUs in the group */ sum_weighted_load = sum_nr_running = avg_load = 0; @@ -3140,8 +3134,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, max_cpu_load = 0; min_cpu_load = ~0UL; - for_each_cpu_and(i, sched_group_cpus(group), cpus) { - struct rq *rq = cpu_rq(i); + for_each_cpu_mask_nr(i, group->cpumask) { + struct rq *rq; + + if (!cpu_isset(i, *cpus)) + continue; + + rq = cpu_rq(i); if (*sd_idle && rq->nr_running) *sd_idle = 0; @@ -3252,8 +3251,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ if ((sum_nr_running < min_nr_running) || (sum_nr_running == min_nr_running && - cpumask_first(sched_group_cpus(group)) > - cpumask_first(sched_group_cpus(group_min)))) { + first_cpu(group->cpumask) < + first_cpu(group_min->cpumask))) { group_min = group; min_nr_running = sum_nr_running; min_load_per_task = sum_weighted_load / @@ -3268,8 +3267,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (sum_nr_running <= group_capacity - 1) { if (sum_nr_running > leader_nr_running || (sum_nr_running == leader_nr_running && - cpumask_first(sched_group_cpus(group)) < - cpumask_first(sched_group_cpus(group_leader)))) { + first_cpu(group->cpumask) > + first_cpu(group_leader->cpumask))) { group_leader = group; leader_nr_running = sum_nr_running; } @@ -3395,10 +3394,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (this == group_leader && group_leader != group_min) { *imbalance = min_load_per_task; - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = - cpumask_first(sched_group_cpus(group_leader)); - } return group_min; } #endif @@ -3412,16 +3407,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ static struct rq * find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, - unsigned long imbalance, const struct cpumask *cpus) + unsigned long imbalance, const cpumask_t *cpus) { struct rq *busiest = NULL, *rq; unsigned long max_load = 0; int i; - for_each_cpu(i, sched_group_cpus(group)) { + for_each_cpu_mask_nr(i, group->cpumask) { unsigned long wl; - if (!cpumask_test_cpu(i, cpus)) + if (!cpu_isset(i, *cpus)) continue; rq = cpu_rq(i); @@ -3451,7 +3446,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, - int *balance, struct cpumask *cpus) + int *balance, cpumask_t *cpus) { int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; @@ -3459,7 +3454,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct rq *busiest; unsigned long flags; - cpumask_setall(cpus); + cpus_setall(*cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3519,8 +3514,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(all_pinned)) { - cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) + cpu_clear(cpu_of(busiest), *cpus); + if (!cpus_empty(*cpus)) goto redo; goto out_balanced; } @@ -3537,8 +3532,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, /* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ - if (!cpumask_test_cpu(this_cpu, - &busiest->curr->cpus_allowed)) { + if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; goto out_one_pinned; @@ -3613,7 +3607,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, */ static int load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, - struct cpumask *cpus) + cpumask_t *cpus) { struct sched_group *group; struct rq *busiest = NULL; @@ -3622,7 +3616,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, int sd_idle = 0; int all_pinned = 0; - cpumask_setall(cpus); + cpus_setall(*cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3666,71 +3660,17 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, double_unlock_balance(this_rq, busiest); if (unlikely(all_pinned)) { - cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) + cpu_clear(cpu_of(busiest), *cpus); + if (!cpus_empty(*cpus)) goto redo; } } if (!ld_moved) { - int active_balance = 0; - schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - - if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) - return -1; - - if (sd->nr_balance_failed++ < 2) - return -1; - - /* - * The only task running in a non-idle cpu can be moved to this - * cpu in an attempt to completely freeup the other CPU - * package. The same method used to move task in load_balance() - * have been extended for load_balance_newidle() to speedup - * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2) - * - * The package power saving logic comes from - * find_busiest_group(). If there are no imbalance, then - * f_b_g() will return NULL. However when sched_mc={1,2} then - * f_b_g() will select a group from which a running task may be - * pulled to this cpu in order to make the other package idle. - * If there is no opportunity to make a package idle and if - * there are no imbalance, then f_b_g() will return NULL and no - * action will be taken in load_balance_newidle(). - * - * Under normal task pull operation due to imbalance, there - * will be more than one task in the source run queue and - * move_tasks() will succeed. ld_moved will be true and this - * active balance code will not be triggered. - */ - - /* Lock busiest in correct order while this_rq is held */ - double_lock_balance(this_rq, busiest); - - /* - * don't kick the migration_thread, if the curr - * task on busiest cpu can't be moved to this_cpu - */ - if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { - double_unlock_balance(this_rq, busiest); - all_pinned = 1; - return ld_moved; - } - - if (!busiest->active_balance) { - busiest->active_balance = 1; - busiest->push_cpu = this_cpu; - active_balance = 1; - } - - double_unlock_balance(this_rq, busiest); - if (active_balance) - wake_up_process(busiest->migration_thread); - } else sd->nr_balance_failed = 0; @@ -3756,10 +3696,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; unsigned long next_balance = jiffies + HZ; - cpumask_var_t tmpmask; - - if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC)) - return; + cpumask_t tmpmask; for_each_domain(this_cpu, sd) { unsigned long interval; @@ -3770,7 +3707,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) if (sd->flags & SD_BALANCE_NEWIDLE) /* If we've pulled tasks over stop searching: */ pulled_task = load_balance_newidle(this_cpu, this_rq, - sd, tmpmask); + sd, &tmpmask); interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) @@ -3785,7 +3722,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) */ this_rq->next_balance = next_balance; } - free_cpumask_var(tmpmask); } /* @@ -3823,7 +3759,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { if ((sd->flags & SD_LOAD_BALANCE) && - cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) + cpu_isset(busiest_cpu, sd->span)) break; } @@ -3842,9 +3778,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) #ifdef CONFIG_NO_HZ static struct { atomic_t load_balancer; - cpumask_var_t cpu_mask; + cpumask_t cpu_mask; } nohz ____cacheline_aligned = { .load_balancer = ATOMIC_INIT(-1), + .cpu_mask = CPU_MASK_NONE, }; /* @@ -3872,7 +3809,7 @@ int select_nohz_load_balancer(int stop_tick) int cpu = smp_processor_id(); if (stop_tick) { - cpumask_set_cpu(cpu, nohz.cpu_mask); + cpu_set(cpu, nohz.cpu_mask); cpu_rq(cpu)->in_nohz_recently = 1; /* @@ -3886,7 +3823,7 @@ int select_nohz_load_balancer(int stop_tick) } /* time for ilb owner also to sleep */ - if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { + if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { if (atomic_read(&nohz.load_balancer) == cpu) atomic_set(&nohz.load_balancer, -1); return 0; @@ -3899,10 +3836,10 @@ int select_nohz_load_balancer(int stop_tick) } else if (atomic_read(&nohz.load_balancer) == cpu) return 1; } else { - if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) + if (!cpu_isset(cpu, nohz.cpu_mask)) return 0; - cpumask_clear_cpu(cpu, nohz.cpu_mask); + cpu_clear(cpu, nohz.cpu_mask); if (atomic_read(&nohz.load_balancer) == cpu) if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) @@ -3930,11 +3867,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) unsigned long next_balance = jiffies + 60*HZ; int update_next_balance = 0; int need_serialize; - cpumask_var_t tmp; - - /* Fails alloc? Rebalancing probably not a priority right now. */ - if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) - return; + cpumask_t tmp; for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -3959,7 +3892,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) } if (time_after_eq(jiffies, sd->last_balance + interval)) { - if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { + if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { /* * We've pulled tasks over so either we're no * longer idle, or one of our SMT siblings is @@ -3993,8 +3926,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) */ if (likely(update_next_balance)) rq->next_balance = next_balance; - - free_cpumask_var(tmp); } /* @@ -4019,13 +3950,12 @@ static void run_rebalance_domains(struct softirq_action *h) */ if (this_rq->idle_at_tick && atomic_read(&nohz.load_balancer) == this_cpu) { + cpumask_t cpus = nohz.cpu_mask; struct rq *rq; int balance_cpu; - for_each_cpu(balance_cpu, nohz.cpu_mask) { - if (balance_cpu == this_cpu) - continue; - + cpu_clear(this_cpu, cpus); + for_each_cpu_mask_nr(balance_cpu, cpus) { /* * If this cpu gets work to do, stop the load balancing * work being done for other cpus. Next load @@ -4063,7 +3993,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) rq->in_nohz_recently = 0; if (atomic_read(&nohz.load_balancer) == cpu) { - cpumask_clear_cpu(cpu, nohz.cpu_mask); + cpu_clear(cpu, nohz.cpu_mask); atomic_set(&nohz.load_balancer, -1); } @@ -4076,7 +4006,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * TBD: Traverse the sched domains and nominate * the nearest cpu in the nohz.cpu_mask. */ - int ilb = cpumask_first(nohz.cpu_mask); + int ilb = first_cpu(nohz.cpu_mask); if (ilb < nr_cpu_ids) resched_cpu(ilb); @@ -4088,7 +4018,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * cpus with ticks stopped, is it time for that to stop? */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && - cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { + cpus_weight(nohz.cpu_mask) == num_online_cpus()) { resched_cpu(cpu); return; } @@ -4098,7 +4028,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * someone else, then no need raise the SCHED_SOFTIRQ */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && - cpumask_test_cpu(cpu, nohz.cpu_mask)) + cpu_isset(cpu, nohz.cpu_mask)) return; #endif if (time_after_eq(jiffies, rq->next_balance)) @@ -5471,9 +5401,10 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) return retval; } -long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) { - cpumask_var_t cpus_allowed, new_mask; + cpumask_t cpus_allowed; + cpumask_t new_mask = *in_mask; struct task_struct *p; int retval; @@ -5495,14 +5426,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); - if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { - retval = -ENOMEM; - goto out_put_task; - } - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { - retval = -ENOMEM; - goto out_free_cpus_allowed; - } retval = -EPERM; if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; @@ -5511,41 +5434,37 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_unlock; - cpuset_cpus_allowed(p, cpus_allowed); - cpumask_and(new_mask, in_mask, cpus_allowed); + cpuset_cpus_allowed(p, &cpus_allowed); + cpus_and(new_mask, new_mask, cpus_allowed); again: - retval = set_cpus_allowed_ptr(p, new_mask); + retval = set_cpus_allowed_ptr(p, &new_mask); if (!retval) { - cpuset_cpus_allowed(p, cpus_allowed); - if (!cpumask_subset(new_mask, cpus_allowed)) { + cpuset_cpus_allowed(p, &cpus_allowed); + if (!cpus_subset(new_mask, cpus_allowed)) { /* * We must have raced with a concurrent cpuset * update. Just reset the cpus_allowed to the * cpuset's cpus_allowed */ - cpumask_copy(new_mask, cpus_allowed); + new_mask = cpus_allowed; goto again; } } out_unlock: - free_cpumask_var(new_mask); -out_free_cpus_allowed: - free_cpumask_var(cpus_allowed); -out_put_task: put_task_struct(p); put_online_cpus(); return retval; } static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, - struct cpumask *new_mask) + cpumask_t *new_mask) { - if (len < cpumask_size()) - cpumask_clear(new_mask); - else if (len > cpumask_size()) - len = cpumask_size(); - + if (len < sizeof(cpumask_t)) { + memset(new_mask, 0, sizeof(cpumask_t)); + } else if (len > sizeof(cpumask_t)) { + len = sizeof(cpumask_t); + } return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; } @@ -5558,20 +5477,17 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { - cpumask_var_t new_mask; + cpumask_t new_mask; int retval; - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) - return -ENOMEM; + retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); + if (retval) + return retval; - retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); - if (retval == 0) - retval = sched_setaffinity(pid, new_mask); - free_cpumask_var(new_mask); - return retval; + return sched_setaffinity(pid, &new_mask); } -long sched_getaffinity(pid_t pid, struct cpumask *mask) +long sched_getaffinity(pid_t pid, cpumask_t *mask) { struct task_struct *p; int retval; @@ -5588,7 +5504,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) if (retval) goto out_unlock; - cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); + cpus_and(*mask, p->cpus_allowed, cpu_online_map); out_unlock: read_unlock(&tasklist_lock); @@ -5607,24 +5523,19 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { int ret; - cpumask_var_t mask; + cpumask_t mask; - if (len < cpumask_size()) + if (len < sizeof(cpumask_t)) return -EINVAL; - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + ret = sched_getaffinity(pid, &mask); + if (ret < 0) + return ret; - ret = sched_getaffinity(pid, mask); - if (ret == 0) { - if (copy_to_user(user_mask_ptr, mask, cpumask_size())) - ret = -EFAULT; - else - ret = cpumask_size(); - } - free_cpumask_var(mask); + if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) + return -EFAULT; - return ret; + return sizeof(cpumask_t); } /** @@ -5966,7 +5877,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); idle->prio = idle->normal_prio = MAX_PRIO; - cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); + idle->cpus_allowed = cpumask_of_cpu(cpu); __set_task_cpu(idle, cpu); rq->curr = rq->idle = idle; @@ -5993,9 +5904,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * indicates which cpus entered this state. This is used * in the rcu update to wait only for active cpus. For system * which do not switch off the HZ timer nohz_cpu_mask should - * always be CPU_BITS_NONE. + * always be CPU_MASK_NONE. */ -cpumask_var_t nohz_cpu_mask; +cpumask_t nohz_cpu_mask = CPU_MASK_NONE; /* * Increase the granularity value when there are more CPUs, @@ -6050,7 +5961,7 @@ static inline void sched_init_granularity(void) * task must not exit() & deallocate itself prematurely. The * call is not atomic; no spinlocks may be held. */ -int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) { struct migration_req req; unsigned long flags; @@ -6058,13 +5969,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) int ret = 0; rq = task_rq_lock(p, &flags); - if (!cpumask_intersects(new_mask, cpu_online_mask)) { + if (!cpus_intersects(*new_mask, cpu_online_map)) { ret = -EINVAL; goto out; } if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && - !cpumask_equal(&p->cpus_allowed, new_mask))) { + !cpus_equal(p->cpus_allowed, *new_mask))) { ret = -EINVAL; goto out; } @@ -6072,15 +5983,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); else { - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + p->cpus_allowed = *new_mask; + p->rt.nr_cpus_allowed = cpus_weight(*new_mask); } /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), new_mask)) + if (cpu_isset(task_cpu(p), *new_mask)) goto out; - if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { + if (migrate_task(p, any_online_cpu(*new_mask), &req)) { /* Need help from migration thread: drop lock and wait. */ task_rq_unlock(rq, &flags); wake_up_process(rq->migration_thread); @@ -6122,7 +6033,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) if (task_cpu(p) != src_cpu) goto done; /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!cpu_isset(dest_cpu, p->cpus_allowed)) goto fail; on_rq = p->se.on_rq; @@ -6219,43 +6130,50 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) */ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { + unsigned long flags; + cpumask_t mask; + struct rq *rq; int dest_cpu; - /* FIXME: Use cpumask_of_node here. */ - cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu)); - const struct cpumask *nodemask = &_nodemask; - -again: - /* Look for allowed, online CPU in same node. */ - for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) - goto move; - - /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); - if (dest_cpu < nr_cpu_ids) - goto move; - - /* No more Mr. Nice Guy. */ - if (dest_cpu >= nr_cpu_ids) { - cpuset_cpus_allowed_locked(p, &p->cpus_allowed); - dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu%d\n", - task_pid_nr(p), p->comm, dead_cpu); - } - } + do { + /* On same node? */ + mask = node_to_cpumask(cpu_to_node(dead_cpu)); + cpus_and(mask, mask, p->cpus_allowed); + dest_cpu = any_online_cpu(mask); -move: - /* It can have affinity changed while we were choosing. */ - if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) - goto again; + /* On any allowed CPU? */ + if (dest_cpu >= nr_cpu_ids) + dest_cpu = any_online_cpu(p->cpus_allowed); + + /* No more Mr. Nice Guy. */ + if (dest_cpu >= nr_cpu_ids) { + cpumask_t cpus_allowed; + + cpuset_cpus_allowed_locked(p, &cpus_allowed); + /* + * Try to stay on the same cpuset, where the + * current cpuset may be a subset of all cpus. + * The cpuset_cpus_allowed_locked() variant of + * cpuset_cpus_allowed() will not block. It must be + * called within calls to cpuset_lock/cpuset_unlock. + */ + rq = task_rq_lock(p, &flags); + p->cpus_allowed = cpus_allowed; + dest_cpu = any_online_cpu(p->cpus_allowed); + task_rq_unlock(rq, &flags); + + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu%d\n", + task_pid_nr(p), p->comm, dead_cpu); + } + } + } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); } /* @@ -6267,7 +6185,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) */ static void migrate_nr_uninterruptible(struct rq *rq_src) { - struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); + struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); unsigned long flags; local_irq_save(flags); @@ -6557,7 +6475,7 @@ static void set_rq_online(struct rq *rq) if (!rq->online) { const struct sched_class *class; - cpumask_set_cpu(rq->cpu, rq->rd->online); + cpu_set(rq->cpu, rq->rd->online); rq->online = 1; for_each_class(class) { @@ -6577,7 +6495,7 @@ static void set_rq_offline(struct rq *rq) class->rq_offline(rq); } - cpumask_clear_cpu(rq->cpu, rq->rd->online); + cpu_clear(rq->cpu, rq->rd->online); rq->online = 0; } } @@ -6618,7 +6536,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + BUG_ON(!cpu_isset(cpu, rq->rd->span)); set_rq_online(rq); } @@ -6632,7 +6550,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; /* Unbind it from offline cpu so it can run. Fall thru. */ kthread_bind(cpu_rq(cpu)->migration_thread, - cpumask_any(cpu_online_mask)); + any_online_cpu(cpu_online_map)); kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; @@ -6682,7 +6600,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + BUG_ON(!cpu_isset(cpu, rq->rd->span)); set_rq_offline(rq); } spin_unlock_irqrestore(&rq->lock, flags); @@ -6721,13 +6639,13 @@ early_initcall(migration_init); #ifdef CONFIG_SCHED_DEBUG static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, - struct cpumask *groupmask) + cpumask_t *groupmask) { struct sched_group *group = sd->groups; char str[256]; - cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd)); - cpumask_clear(groupmask); + cpulist_scnprintf(str, sizeof(str), sd->span); + cpus_clear(*groupmask); printk(KERN_DEBUG "%*s domain %d: ", level, "", level); @@ -6741,11 +6659,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, printk(KERN_CONT "span %s level %s\n", str, sd->name); - if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (!cpu_isset(cpu, sd->span)) { printk(KERN_ERR "ERROR: domain->span does not contain " "CPU%d\n", cpu); } - if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) { + if (!cpu_isset(cpu, group->cpumask)) { printk(KERN_ERR "ERROR: domain->groups does not contain" " CPU%d\n", cpu); } @@ -6765,32 +6683,31 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!cpumask_weight(sched_group_cpus(group))) { + if (!cpus_weight(group->cpumask)) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: empty group\n"); break; } - if (cpumask_intersects(groupmask, sched_group_cpus(group))) { + if (cpus_intersects(*groupmask, group->cpumask)) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: repeated CPUs\n"); break; } - cpumask_or(groupmask, groupmask, sched_group_cpus(group)); + cpus_or(*groupmask, *groupmask, group->cpumask); - cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); + cpulist_scnprintf(str, sizeof(str), group->cpumask); printk(KERN_CONT " %s", str); group = group->next; } while (group != sd->groups); printk(KERN_CONT "\n"); - if (!cpumask_equal(sched_domain_span(sd), groupmask)) + if (!cpus_equal(sd->span, *groupmask)) printk(KERN_ERR "ERROR: groups don't span domain->span\n"); - if (sd->parent && - !cpumask_subset(groupmask, sched_domain_span(sd->parent))) + if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) printk(KERN_ERR "ERROR: parent span is not a superset " "of domain->span\n"); return 0; @@ -6798,7 +6715,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, static void sched_domain_debug(struct sched_domain *sd, int cpu) { - cpumask_var_t groupmask; + cpumask_t *groupmask; int level = 0; if (!sd) { @@ -6808,7 +6725,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); - if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) { + groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!groupmask) { printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); return; } @@ -6821,7 +6739,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) if (!sd) break; } - free_cpumask_var(groupmask); + kfree(groupmask); } #else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) @@ -6829,7 +6747,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) static int sd_degenerate(struct sched_domain *sd) { - if (cpumask_weight(sched_domain_span(sd)) == 1) + if (cpus_weight(sd->span) == 1) return 1; /* Following flags need at least 2 groups */ @@ -6860,7 +6778,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) if (sd_degenerate(parent)) return 1; - if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) + if (!cpus_equal(sd->span, parent->span)) return 0; /* Does parent contain flags not in child? */ @@ -6884,16 +6802,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } -static void free_rootdomain(struct root_domain *rd) -{ - cpupri_cleanup(&rd->cpupri); - - free_cpumask_var(rd->rto_mask); - free_cpumask_var(rd->online); - free_cpumask_var(rd->span); - kfree(rd); -} - static void rq_attach_root(struct rq *rq, struct root_domain *rd) { unsigned long flags; @@ -6903,63 +6811,38 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) if (rq->rd) { struct root_domain *old_rd = rq->rd; - if (cpumask_test_cpu(rq->cpu, old_rd->online)) + if (cpu_isset(rq->cpu, old_rd->online)) set_rq_offline(rq); - cpumask_clear_cpu(rq->cpu, old_rd->span); + cpu_clear(rq->cpu, old_rd->span); if (atomic_dec_and_test(&old_rd->refcount)) - free_rootdomain(old_rd); + kfree(old_rd); } atomic_inc(&rd->refcount); rq->rd = rd; - cpumask_set_cpu(rq->cpu, rd->span); - if (cpumask_test_cpu(rq->cpu, cpu_online_mask)) + cpu_set(rq->cpu, rd->span); + if (cpu_isset(rq->cpu, cpu_online_map)) set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); } -static int init_rootdomain(struct root_domain *rd, bool bootmem) +static void init_rootdomain(struct root_domain *rd) { memset(rd, 0, sizeof(*rd)); - if (bootmem) { - alloc_bootmem_cpumask_var(&def_root_domain.span); - alloc_bootmem_cpumask_var(&def_root_domain.online); - alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); - cpupri_init(&rd->cpupri, true); - return 0; - } - - if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) - goto free_rd; - if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) - goto free_span; - if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) - goto free_online; - - if (cpupri_init(&rd->cpupri, false) != 0) - goto free_rto_mask; - return 0; + cpus_clear(rd->span); + cpus_clear(rd->online); -free_rto_mask: - free_cpumask_var(rd->rto_mask); -free_online: - free_cpumask_var(rd->online); -free_span: - free_cpumask_var(rd->span); -free_rd: - kfree(rd); - return -ENOMEM; + cpupri_init(&rd->cpupri); } static void init_defrootdomain(void) { - init_rootdomain(&def_root_domain, true); - + init_rootdomain(&def_root_domain); atomic_set(&def_root_domain.refcount, 1); } @@ -6971,10 +6854,7 @@ static struct root_domain *alloc_rootdomain(void) if (!rd) return NULL; - if (init_rootdomain(rd, false) != 0) { - kfree(rd); - return NULL; - } + init_rootdomain(rd); return rd; } @@ -7016,12 +6896,19 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) } /* cpus with isolated domains */ -static cpumask_var_t cpu_isolated_map; +static cpumask_t cpu_isolated_map = CPU_MASK_NONE; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { - cpulist_parse(str, cpu_isolated_map); + static int __initdata ints[NR_CPUS]; + int i; + + str = get_options(str, ARRAY_SIZE(ints), ints); + cpus_clear(cpu_isolated_map); + for (i = 1; i <= ints[0]; i++) + if (ints[i] < NR_CPUS) + cpu_set(ints[i], cpu_isolated_map); return 1; } @@ -7030,43 +6917,42 @@ __setup("isolcpus=", isolated_cpu_setup); /* * init_sched_build_groups takes the cpumask we wish to span, and a pointer * to a function which identifies what group(along with sched group) a CPU - * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids - * (due to the fact that we keep track of groups covered with a struct cpumask). + * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS + * (due to the fact that we keep track of groups covered with a cpumask_t). * * init_sched_build_groups will build a circular linked list of the groups * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ static void -init_sched_build_groups(const struct cpumask *span, - const struct cpumask *cpu_map, - int (*group_fn)(int cpu, const struct cpumask *cpu_map, +init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, + int (*group_fn)(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, - struct cpumask *tmpmask), - struct cpumask *covered, struct cpumask *tmpmask) + cpumask_t *tmpmask), + cpumask_t *covered, cpumask_t *tmpmask) { struct sched_group *first = NULL, *last = NULL; int i; - cpumask_clear(covered); + cpus_clear(*covered); - for_each_cpu(i, span) { + for_each_cpu_mask_nr(i, *span) { struct sched_group *sg; int group = group_fn(i, cpu_map, &sg, tmpmask); int j; - if (cpumask_test_cpu(i, covered)) + if (cpu_isset(i, *covered)) continue; - cpumask_clear(sched_group_cpus(sg)); + cpus_clear(sg->cpumask); sg->__cpu_power = 0; - for_each_cpu(j, span) { + for_each_cpu_mask_nr(j, *span) { if (group_fn(j, cpu_map, NULL, tmpmask) != group) continue; - cpumask_set_cpu(j, covered); - cpumask_set_cpu(j, sched_group_cpus(sg)); + cpu_set(j, *covered); + cpu_set(j, sg->cpumask); } if (!first) first = sg; @@ -7130,10 +7016,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) * should be one that prevents unnecessary balancing, but also spreads tasks * out optimally. */ -static void sched_domain_node_span(int node, struct cpumask *span) +static void sched_domain_node_span(int node, cpumask_t *span) { nodemask_t used_nodes; - /* FIXME: use cpumask_of_node() */ node_to_cpumask_ptr(nodemask, node); int i; @@ -7154,34 +7039,19 @@ static void sched_domain_node_span(int node, struct cpumask *span) int sched_smt_power_savings = 0, sched_mc_power_savings = 0; -/* - * The cpus mask in sched_group and sched_domain hangs off the end. - * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space - * for nr_cpu_ids < CONFIG_NR_CPUS. - */ -struct static_sched_group { - struct sched_group sg; - DECLARE_BITMAP(cpus, CONFIG_NR_CPUS); -}; - -struct static_sched_domain { - struct sched_domain sd; - DECLARE_BITMAP(span, CONFIG_NR_CPUS); -}; - /* * SMT sched-domains: */ #ifdef CONFIG_SCHED_SMT -static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); static int -cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *unused) +cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *unused) { if (sg) - *sg = &per_cpu(sched_group_cpus, cpu).sg; + *sg = &per_cpu(sched_group_cpus, cpu); return cpu; } #endif /* CONFIG_SCHED_SMT */ @@ -7190,55 +7060,56 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, * multi-core sched-domains: */ #ifdef CONFIG_SCHED_MC -static DEFINE_PER_CPU(struct static_sched_domain, core_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); +static DEFINE_PER_CPU(struct sched_domain, core_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_core); #endif /* CONFIG_SCHED_MC */ #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) static int -cpu_to_core_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *mask) +cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *mask) { int group; - cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); - group = cpumask_first(mask); + *mask = per_cpu(cpu_sibling_map, cpu); + cpus_and(*mask, *mask, *cpu_map); + group = first_cpu(*mask); if (sg) - *sg = &per_cpu(sched_group_core, group).sg; + *sg = &per_cpu(sched_group_core, group); return group; } #elif defined(CONFIG_SCHED_MC) static int -cpu_to_core_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *unused) +cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *unused) { if (sg) - *sg = &per_cpu(sched_group_core, cpu).sg; + *sg = &per_cpu(sched_group_core, cpu); return cpu; } #endif -static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_phys); static int -cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *mask) +cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *mask) { int group; #ifdef CONFIG_SCHED_MC - /* FIXME: Use cpu_coregroup_mask. */ *mask = cpu_coregroup_map(cpu); cpus_and(*mask, *mask, *cpu_map); - group = cpumask_first(mask); + group = first_cpu(*mask); #elif defined(CONFIG_SCHED_SMT) - cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); - group = cpumask_first(mask); + *mask = per_cpu(cpu_sibling_map, cpu); + cpus_and(*mask, *mask, *cpu_map); + group = first_cpu(*mask); #else group = cpu; #endif if (sg) - *sg = &per_cpu(sched_group_phys, group).sg; + *sg = &per_cpu(sched_group_phys, group); return group; } @@ -7252,21 +7123,19 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains); static struct sched_group ***sched_group_nodes_bycpu; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); +static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); -static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, - struct cpumask *nodemask) +static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg, cpumask_t *nodemask) { int group; - /* FIXME: use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu)); - cpumask_and(nodemask, pnodemask, cpu_map); - group = cpumask_first(nodemask); + *nodemask = node_to_cpumask(cpu_to_node(cpu)); + cpus_and(*nodemask, *nodemask, *cpu_map); + group = first_cpu(*nodemask); if (sg) - *sg = &per_cpu(sched_group_allnodes, group).sg; + *sg = &per_cpu(sched_group_allnodes, group); return group; } @@ -7278,11 +7147,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) if (!sg) return; do { - for_each_cpu(j, sched_group_cpus(sg)) { + for_each_cpu_mask_nr(j, sg->cpumask) { struct sched_domain *sd; - sd = &per_cpu(phys_domains, j).sd; - if (j != cpumask_first(sched_group_cpus(sd->groups))) { + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { /* * Only add "power" once for each * physical package. @@ -7299,12 +7168,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) #ifdef CONFIG_NUMA /* Free memory allocated for various sched_group structures */ -static void free_sched_groups(const struct cpumask *cpu_map, - struct cpumask *nodemask) +static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { int cpu, i; - for_each_cpu(cpu, cpu_map) { + for_each_cpu_mask_nr(cpu, *cpu_map) { struct sched_group **sched_group_nodes = sched_group_nodes_bycpu[cpu]; @@ -7313,11 +7181,10 @@ static void free_sched_groups(const struct cpumask *cpu_map, for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; - /* FIXME: Use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, i); - cpus_and(*nodemask, *pnodemask, *cpu_map); - if (cpumask_empty(nodemask)) + *nodemask = node_to_cpumask(i); + cpus_and(*nodemask, *nodemask, *cpu_map); + if (cpus_empty(*nodemask)) continue; if (sg == NULL) @@ -7335,8 +7202,7 @@ static void free_sched_groups(const struct cpumask *cpu_map, } } #else /* !CONFIG_NUMA */ -static void free_sched_groups(const struct cpumask *cpu_map, - struct cpumask *nodemask) +static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { } #endif /* CONFIG_NUMA */ @@ -7362,7 +7228,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) WARN_ON(!sd || !sd->groups); - if (cpu != cpumask_first(sched_group_cpus(sd->groups))) + if (cpu != first_cpu(sd->groups->cpumask)) return; child = sd->child; @@ -7427,6 +7293,48 @@ SD_INIT_FUNC(CPU) SD_INIT_FUNC(MC) #endif +/* + * To minimize stack usage kmalloc room for cpumasks and share the + * space as the usage in build_sched_domains() dictates. Used only + * if the amount of space is significant. + */ +struct allmasks { + cpumask_t tmpmask; /* make this one first */ + union { + cpumask_t nodemask; + cpumask_t this_sibling_map; + cpumask_t this_core_map; + }; + cpumask_t send_covered; + +#ifdef CONFIG_NUMA + cpumask_t domainspan; + cpumask_t covered; + cpumask_t notcovered; +#endif +}; + +#if NR_CPUS > 128 +#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v +static inline void sched_cpumask_alloc(struct allmasks **masks) +{ + *masks = kmalloc(sizeof(**masks), GFP_KERNEL); +} +static inline void sched_cpumask_free(struct allmasks *masks) +{ + kfree(masks); +} +#else +#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v +static inline void sched_cpumask_alloc(struct allmasks **masks) +{ } +static inline void sched_cpumask_free(struct allmasks *masks) +{ } +#endif + +#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ + ((unsigned long)(a) + offsetof(struct allmasks, v)) + static int default_relax_domain_level = -1; static int __init setup_relax_domain_level(char *str) @@ -7466,38 +7374,17 @@ static void set_domain_attribute(struct sched_domain *sd, * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ -static int __build_sched_domains(const struct cpumask *cpu_map, +static int __build_sched_domains(const cpumask_t *cpu_map, struct sched_domain_attr *attr) { - int i, err = -ENOMEM; + int i; struct root_domain *rd; - cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered, - tmpmask; + SCHED_CPUMASK_DECLARE(allmasks); + cpumask_t *tmpmask; #ifdef CONFIG_NUMA - cpumask_var_t domainspan, covered, notcovered; struct sched_group **sched_group_nodes = NULL; int sd_allnodes = 0; - if (!alloc_cpumask_var(&domainspan, GFP_KERNEL)) - goto out; - if (!alloc_cpumask_var(&covered, GFP_KERNEL)) - goto free_domainspan; - if (!alloc_cpumask_var(¬covered, GFP_KERNEL)) - goto free_covered; -#endif - - if (!alloc_cpumask_var(&nodemask, GFP_KERNEL)) - goto free_notcovered; - if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL)) - goto free_nodemask; - if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL)) - goto free_this_sibling_map; - if (!alloc_cpumask_var(&send_covered, GFP_KERNEL)) - goto free_this_core_map; - if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) - goto free_send_covered; - -#ifdef CONFIG_NUMA /* * Allocate the per-node list of sched groups */ @@ -7505,37 +7392,54 @@ static int __build_sched_domains(const struct cpumask *cpu_map, GFP_KERNEL); if (!sched_group_nodes) { printk(KERN_WARNING "Can not alloc sched group node list\n"); - goto free_tmpmask; + return -ENOMEM; } #endif rd = alloc_rootdomain(); if (!rd) { printk(KERN_WARNING "Cannot alloc root domain\n"); - goto free_sched_groups; +#ifdef CONFIG_NUMA + kfree(sched_group_nodes); +#endif + return -ENOMEM; } + /* get space for all scratch cpumask variables */ + sched_cpumask_alloc(&allmasks); + if (!allmasks) { + printk(KERN_WARNING "Cannot alloc cpumask array\n"); + kfree(rd); #ifdef CONFIG_NUMA - sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes; + kfree(sched_group_nodes); +#endif + return -ENOMEM; + } + + tmpmask = (cpumask_t *)allmasks; + + +#ifdef CONFIG_NUMA + sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; #endif /* * Set up domains for cpus specified by the cpu_map. */ - for_each_cpu(i, cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd = NULL, *p; + SCHED_CPUMASK_VAR(nodemask, allmasks); - /* FIXME: use cpumask_of_node */ *nodemask = node_to_cpumask(cpu_to_node(i)); cpus_and(*nodemask, *nodemask, *cpu_map); #ifdef CONFIG_NUMA - if (cpumask_weight(cpu_map) > - SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { + if (cpus_weight(*cpu_map) > + SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { sd = &per_cpu(allnodes_domains, i); SD_INIT(sd, ALLNODES); set_domain_attribute(sd, attr); - cpumask_copy(sched_domain_span(sd), cpu_map); + sd->span = *cpu_map; cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); p = sd; sd_allnodes = 1; @@ -7545,19 +7449,18 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = &per_cpu(node_domains, i); SD_INIT(sd, NODE); set_domain_attribute(sd, attr); - sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); + sched_domain_node_span(cpu_to_node(i), &sd->span); sd->parent = p; if (p) p->child = sd; - cpumask_and(sched_domain_span(sd), - sched_domain_span(sd), cpu_map); + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; - sd = &per_cpu(phys_domains, i).sd; + sd = &per_cpu(phys_domains, i); SD_INIT(sd, CPU); set_domain_attribute(sd, attr); - cpumask_copy(sched_domain_span(sd), nodemask); + sd->span = *nodemask; sd->parent = p; if (p) p->child = sd; @@ -7565,12 +7468,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_MC p = sd; - sd = &per_cpu(core_domains, i).sd; + sd = &per_cpu(core_domains, i); SD_INIT(sd, MC); set_domain_attribute(sd, attr); - *sched_domain_span(sd) = cpu_coregroup_map(i); - cpumask_and(sched_domain_span(sd), - sched_domain_span(sd), cpu_map); + sd->span = cpu_coregroup_map(i); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); @@ -7578,11 +7480,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_SMT p = sd; - sd = &per_cpu(cpu_domains, i).sd; + sd = &per_cpu(cpu_domains, i); SD_INIT(sd, SIBLING); set_domain_attribute(sd, attr); - cpumask_and(sched_domain_span(sd), - &per_cpu(cpu_sibling_map, i), cpu_map); + sd->span = per_cpu(cpu_sibling_map, i); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); @@ -7591,10 +7493,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu(i, cpu_map) { - cpumask_and(this_sibling_map, - &per_cpu(cpu_sibling_map, i), cpu_map); - if (i != cpumask_first(this_sibling_map)) + for_each_cpu_mask_nr(i, *cpu_map) { + SCHED_CPUMASK_VAR(this_sibling_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + + *this_sibling_map = per_cpu(cpu_sibling_map, i); + cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map); + if (i != first_cpu(*this_sibling_map)) continue; init_sched_build_groups(this_sibling_map, cpu_map, @@ -7605,11 +7510,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_MC /* Set up multi-core groups */ - for_each_cpu(i, cpu_map) { - /* FIXME: Use cpu_coregroup_mask */ + for_each_cpu_mask_nr(i, *cpu_map) { + SCHED_CPUMASK_VAR(this_core_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + *this_core_map = cpu_coregroup_map(i); cpus_and(*this_core_map, *this_core_map, *cpu_map); - if (i != cpumask_first(this_core_map)) + if (i != first_cpu(*this_core_map)) continue; init_sched_build_groups(this_core_map, cpu_map, @@ -7620,10 +7527,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, /* Set up physical groups */ for (i = 0; i < nr_node_ids; i++) { - /* FIXME: Use cpumask_of_node */ + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + *nodemask = node_to_cpumask(i); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpumask_empty(nodemask)) + if (cpus_empty(*nodemask)) continue; init_sched_build_groups(nodemask, cpu_map, @@ -7634,6 +7543,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_NUMA /* Set up node groups */ if (sd_allnodes) { + SCHED_CPUMASK_VAR(send_covered, allmasks); + init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group, send_covered, tmpmask); @@ -7642,58 +7553,58 @@ static int __build_sched_domains(const struct cpumask *cpu_map, for (i = 0; i < nr_node_ids; i++) { /* Set up node groups */ struct sched_group *sg, *prev; + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(domainspan, allmasks); + SCHED_CPUMASK_VAR(covered, allmasks); int j; - /* FIXME: Use cpumask_of_node */ *nodemask = node_to_cpumask(i); - cpumask_clear(covered); + cpus_clear(*covered); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpumask_empty(nodemask)) { + if (cpus_empty(*nodemask)) { sched_group_nodes[i] = NULL; continue; } sched_domain_node_span(i, domainspan); - cpumask_and(domainspan, domainspan, cpu_map); + cpus_and(*domainspan, *domainspan, *cpu_map); - sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), - GFP_KERNEL, i); + sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING "Can not alloc domain group for " "node %d\n", i); goto error; } sched_group_nodes[i] = sg; - for_each_cpu(j, nodemask) { + for_each_cpu_mask_nr(j, *nodemask) { struct sched_domain *sd; sd = &per_cpu(node_domains, j); sd->groups = sg; } sg->__cpu_power = 0; - cpumask_copy(sched_group_cpus(sg), nodemask); + sg->cpumask = *nodemask; sg->next = sg; - cpumask_or(covered, covered, nodemask); + cpus_or(*covered, *covered, *nodemask); prev = sg; for (j = 0; j < nr_node_ids; j++) { + SCHED_CPUMASK_VAR(notcovered, allmasks); int n = (i + j) % nr_node_ids; - /* FIXME: Use cpumask_of_node */ node_to_cpumask_ptr(pnodemask, n); - cpumask_complement(notcovered, covered); - cpumask_and(tmpmask, notcovered, cpu_map); - cpumask_and(tmpmask, tmpmask, domainspan); - if (cpumask_empty(tmpmask)) + cpus_complement(*notcovered, *covered); + cpus_and(*tmpmask, *notcovered, *cpu_map); + cpus_and(*tmpmask, *tmpmask, *domainspan); + if (cpus_empty(*tmpmask)) break; - cpumask_and(tmpmask, tmpmask, pnodemask); - if (cpumask_empty(tmpmask)) + cpus_and(*tmpmask, *tmpmask, *pnodemask); + if (cpus_empty(*tmpmask)) continue; - sg = kmalloc_node(sizeof(struct sched_group) + - cpumask_size(), + sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING @@ -7701,9 +7612,9 @@ static int __build_sched_domains(const struct cpumask *cpu_map, goto error; } sg->__cpu_power = 0; - cpumask_copy(sched_group_cpus(sg), tmpmask); + sg->cpumask = *tmpmask; sg->next = prev->next; - cpumask_or(covered, covered, tmpmask); + cpus_or(*covered, *covered, *tmpmask); prev->next = sg; prev = sg; } @@ -7712,22 +7623,22 @@ static int __build_sched_domains(const struct cpumask *cpu_map, /* Calculate CPU power for physical packages and nodes */ #ifdef CONFIG_SCHED_SMT - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(cpu_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(cpu_domains, i); init_sched_groups_power(i, sd); } #endif #ifdef CONFIG_SCHED_MC - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(core_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(core_domains, i); init_sched_groups_power(i, sd); } #endif - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(phys_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(phys_domains, i); init_sched_groups_power(i, sd); } @@ -7739,78 +7650,53 @@ static int __build_sched_domains(const struct cpumask *cpu_map, if (sd_allnodes) { struct sched_group *sg; - cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg, + cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, tmpmask); init_numa_sched_groups_power(sg); } #endif /* Attach the domains */ - for_each_cpu(i, cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i).sd; + sd = &per_cpu(cpu_domains, i); #elif defined(CONFIG_SCHED_MC) - sd = &per_cpu(core_domains, i).sd; + sd = &per_cpu(core_domains, i); #else - sd = &per_cpu(phys_domains, i).sd; + sd = &per_cpu(phys_domains, i); #endif cpu_attach_domain(sd, rd, i); } - err = 0; - -free_tmpmask: - free_cpumask_var(tmpmask); -free_send_covered: - free_cpumask_var(send_covered); -free_this_core_map: - free_cpumask_var(this_core_map); -free_this_sibling_map: - free_cpumask_var(this_sibling_map); -free_nodemask: - free_cpumask_var(nodemask); -free_notcovered: -#ifdef CONFIG_NUMA - free_cpumask_var(notcovered); -free_covered: - free_cpumask_var(covered); -free_domainspan: - free_cpumask_var(domainspan); -out: -#endif - return err; - -free_sched_groups: -#ifdef CONFIG_NUMA - kfree(sched_group_nodes); -#endif - goto free_tmpmask; + sched_cpumask_free(allmasks); + return 0; #ifdef CONFIG_NUMA error: free_sched_groups(cpu_map, tmpmask); - free_rootdomain(rd); - goto free_tmpmask; + sched_cpumask_free(allmasks); + kfree(rd); + return -ENOMEM; #endif } -static int build_sched_domains(const struct cpumask *cpu_map) +static int build_sched_domains(const cpumask_t *cpu_map) { return __build_sched_domains(cpu_map, NULL); } -static struct cpumask *doms_cur; /* current sched domains */ +static cpumask_t *doms_cur; /* current sched domains */ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static struct sched_domain_attr *dattr_cur; /* attribues of custom domains in 'doms_cur' */ /* * Special case: If a kmalloc of a doms_cur partition (array of - * cpumask) fails, then fallback to a single sched domain, - * as determined by the single cpumask fallback_doms. + * cpumask_t) fails, then fallback to a single sched domain, + * as determined by the single cpumask_t fallback_doms. */ -static cpumask_var_t fallback_doms; +static cpumask_t fallback_doms; /* * arch_update_cpu_topology lets virtualized architectures update the @@ -7827,16 +7713,16 @@ int __attribute__((weak)) arch_update_cpu_topology(void) * For now this just excludes isolated cpus, but could be used to * exclude other special cases in the future. */ -static int arch_init_sched_domains(const struct cpumask *cpu_map) +static int arch_init_sched_domains(const cpumask_t *cpu_map) { int err; arch_update_cpu_topology(); ndoms_cur = 1; - doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); + doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); if (!doms_cur) - doms_cur = fallback_doms; - cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); + doms_cur = &fallback_doms; + cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); dattr_cur = NULL; err = build_sched_domains(doms_cur); register_sched_domain_sysctl(); @@ -7844,8 +7730,8 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) return err; } -static void arch_destroy_sched_domains(const struct cpumask *cpu_map, - struct cpumask *tmpmask) +static void arch_destroy_sched_domains(const cpumask_t *cpu_map, + cpumask_t *tmpmask) { free_sched_groups(cpu_map, tmpmask); } @@ -7854,16 +7740,15 @@ static void arch_destroy_sched_domains(const struct cpumask *cpu_map, * Detach sched domains from a group of cpus specified in cpu_map * These cpus will now be attached to the NULL domain */ -static void detach_destroy_domains(const struct cpumask *cpu_map) +static void detach_destroy_domains(const cpumask_t *cpu_map) { - /* Save because hotplug lock held. */ - static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); + cpumask_t tmpmask; int i; - for_each_cpu(i, cpu_map) + for_each_cpu_mask_nr(i, *cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); synchronize_sched(); - arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); + arch_destroy_sched_domains(cpu_map, &tmpmask); } /* handle null as "default" */ @@ -7888,7 +7773,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * doms_new[] to the current sched domain partitioning, doms_cur[]. * It destroys each deleted domain and builds each new domain. * - * 'doms_new' is an array of cpumask's of length 'ndoms_new'. + * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. * The masks don't intersect (don't overlap.) We should setup one * sched domain for each mask. CPUs not in any of the cpumasks will * not be load balanced. If the same cpumask appears both in the @@ -7902,14 +7787,13 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * the single partition 'fallback_doms', it also forces the domains * to be rebuilt. * - * If doms_new == NULL it will be replaced with cpu_online_mask. + * If doms_new == NULL it will be replaced with cpu_online_map. * ndoms_new == 0 is a special case for destroying existing domains, * and it will not create the default domain. * * Call with hotplug lock held */ -/* FIXME: Change to struct cpumask *doms_new[] */ -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { int i, j, n; @@ -7928,7 +7812,7 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(&doms_cur[i], &doms_new[j]) + if (cpus_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } @@ -7940,15 +7824,15 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, if (doms_new == NULL) { ndoms_cur = 0; - doms_new = fallback_doms; - cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); + doms_new = &fallback_doms; + cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); WARN_ON_ONCE(dattr_new); } /* Build new domains */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < ndoms_cur && !new_topology; j++) { - if (cpumask_equal(&doms_new[i], &doms_cur[j]) + if (cpus_equal(doms_new[i], doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } @@ -7960,7 +7844,7 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, } /* Remember the new sched domains */ - if (doms_cur != fallback_doms) + if (doms_cur != &fallback_doms) kfree(doms_cur); kfree(dattr_cur); /* kfree(NULL) is safe */ doms_cur = doms_new; @@ -7989,25 +7873,14 @@ int arch_reinit_sched_domains(void) static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) { int ret; - unsigned int level = 0; - if (sscanf(buf, "%u", &level) != 1) - return -EINVAL; - - /* - * level is always be positive so don't check for - * level < POWERSAVINGS_BALANCE_NONE which is 0 - * What happens on 0 or 1 byte write, - * need to check for count as well? - */ - - if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) + if (buf[0] != '0' && buf[0] != '1') return -EINVAL; if (smt) - sched_smt_power_savings = level; + sched_smt_power_savings = (buf[0] == '1'); else - sched_mc_power_savings = level; + sched_mc_power_savings = (buf[0] == '1'); ret = arch_reinit_sched_domains(); @@ -8111,9 +7984,7 @@ static int update_runtime(struct notifier_block *nfb, void __init sched_init_smp(void) { - cpumask_var_t non_isolated_cpus; - - alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); + cpumask_t non_isolated_cpus; #if defined(CONFIG_NUMA) sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), @@ -8122,10 +7993,10 @@ void __init sched_init_smp(void) #endif get_online_cpus(); mutex_lock(&sched_domains_mutex); - arch_init_sched_domains(cpu_online_mask); - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - if (cpumask_empty(non_isolated_cpus)) - cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); + arch_init_sched_domains(&cpu_online_map); + cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); + if (cpus_empty(non_isolated_cpus)) + cpu_set(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); put_online_cpus(); @@ -8140,13 +8011,9 @@ void __init sched_init_smp(void) init_hrtick(); /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) + if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) BUG(); sched_init_granularity(); - free_cpumask_var(non_isolated_cpus); - - alloc_cpumask_var(&fallback_doms, GFP_KERNEL); - init_sched_rt_class(); } #else void __init sched_init_smp(void) @@ -8461,15 +8328,6 @@ void __init sched_init(void) */ current->sched_class = &fair_sched_class; - /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_bootmem_cpumask_var(&nohz_cpu_mask); -#ifdef CONFIG_SMP -#ifdef CONFIG_NO_HZ - alloc_bootmem_cpumask_var(&nohz.cpu_mask); -#endif - alloc_bootmem_cpumask_var(&cpu_isolated_map); -#endif /* SMP */ - scheduler_running = 1; } diff --git a/trunk/kernel/sched_cpupri.c b/trunk/kernel/sched_cpupri.c index 018b7be1db2e..52154fefab7e 100644 --- a/trunk/kernel/sched_cpupri.c +++ b/trunk/kernel/sched_cpupri.c @@ -67,21 +67,24 @@ static int convert_prio(int prio) * Returns: (int)bool - CPUs were found */ int cpupri_find(struct cpupri *cp, struct task_struct *p, - struct cpumask *lowest_mask) + cpumask_t *lowest_mask) { int idx = 0; int task_pri = convert_prio(p->prio); for_each_cpupri_active(cp->pri_active, idx) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; + cpumask_t mask; if (idx >= task_pri) break; - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) + cpus_and(mask, p->cpus_allowed, vec->mask); + + if (cpus_empty(mask)) continue; - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + *lowest_mask = mask; return 1; } @@ -123,7 +126,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) vec->count--; if (!vec->count) clear_bit(oldpri, cp->pri_active); - cpumask_clear_cpu(cpu, vec->mask); + cpu_clear(cpu, vec->mask); spin_unlock_irqrestore(&vec->lock, flags); } @@ -133,7 +136,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) spin_lock_irqsave(&vec->lock, flags); - cpumask_set_cpu(cpu, vec->mask); + cpu_set(cpu, vec->mask); vec->count++; if (vec->count == 1) set_bit(newpri, cp->pri_active); @@ -147,11 +150,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) /** * cpupri_init - initialize the cpupri structure * @cp: The cpupri context - * @bootmem: true if allocations need to use bootmem * - * Returns: -ENOMEM if memory fails. + * Returns: (void) */ -int cpupri_init(struct cpupri *cp, bool bootmem) +void cpupri_init(struct cpupri *cp) { int i; @@ -162,30 +164,11 @@ int cpupri_init(struct cpupri *cp, bool bootmem) spin_lock_init(&vec->lock); vec->count = 0; - if (bootmem) - alloc_bootmem_cpumask_var(&vec->mask); - else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) - goto cleanup; + cpus_clear(vec->mask); } for_each_possible_cpu(i) cp->cpu_to_pri[i] = CPUPRI_INVALID; - return 0; - -cleanup: - for (i--; i >= 0; i--) - free_cpumask_var(cp->pri_to_cpu[i].mask); - return -ENOMEM; } -/** - * cpupri_cleanup - clean up the cpupri structure - * @cp: The cpupri context - */ -void cpupri_cleanup(struct cpupri *cp) -{ - int i; - for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) - free_cpumask_var(cp->pri_to_cpu[i].mask); -} diff --git a/trunk/kernel/sched_cpupri.h b/trunk/kernel/sched_cpupri.h index 642a94ef8a0a..f25811b0f931 100644 --- a/trunk/kernel/sched_cpupri.h +++ b/trunk/kernel/sched_cpupri.h @@ -14,7 +14,7 @@ struct cpupri_vec { spinlock_t lock; int count; - cpumask_var_t mask; + cpumask_t mask; }; struct cpupri { @@ -27,8 +27,7 @@ struct cpupri { int cpupri_find(struct cpupri *cp, struct task_struct *p, cpumask_t *lowest_mask); void cpupri_set(struct cpupri *cp, int cpu, int pri); -int cpupri_init(struct cpupri *cp, bool bootmem); -void cpupri_cleanup(struct cpupri *cp); +void cpupri_init(struct cpupri *cp); #else #define cpupri_set(cp, cpu, pri) do { } while (0) #define cpupri_init() do { } while (0) diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index 56c0efe902a7..5ad4440f0fc4 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -1019,33 +1019,16 @@ static void yield_task_fair(struct rq *rq) * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (cpu_active_mask) + * hence we need to mask them out (cpu_active_map) * * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) static int wake_idle(int cpu, struct task_struct *p) { + cpumask_t tmp; struct sched_domain *sd; int i; - unsigned int chosen_wakeup_cpu; - int this_cpu; - - /* - * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu - * are idle and this is not a kernel thread and this task's affinity - * allows it to be moved to preferred cpu, then just move! - */ - - this_cpu = smp_processor_id(); - chosen_wakeup_cpu = - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; - - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && - idle_cpu(cpu) && idle_cpu(this_cpu) && - p->mm && !(p->flags & PF_KTHREAD) && - cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) - return chosen_wakeup_cpu; /* * If it is idle, then it is the best cpu to run this task. @@ -1063,9 +1046,10 @@ static int wake_idle(int cpu, struct task_struct *p) if ((sd->flags & SD_WAKE_IDLE) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { - for_each_cpu_and(i, sched_domain_span(sd), - &p->cpus_allowed) { - if (cpu_active(i) && idle_cpu(i)) { + cpus_and(tmp, sd->span, p->cpus_allowed); + cpus_and(tmp, tmp, cpu_active_map); + for_each_cpu_mask_nr(i, tmp) { + if (idle_cpu(i)) { if (i != task_cpu(p)) { schedstat_inc(p, se.nr_wakeups_idle); @@ -1258,13 +1242,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync) * this_cpu and prev_cpu are present in: */ for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { + if (cpu_isset(prev_cpu, sd->span)) { this_sd = sd; break; } } - if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) + if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) goto out; /* diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index 833b6d44483c..51d2af3e6191 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq) if (!rq->online) return; - cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); + cpu_set(rq->cpu, rq->rd->rto_mask); /* * Make sure the mask is visible before we set * the overload count. That is checked to determine @@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq) /* the order here really doesn't matter */ atomic_dec(&rq->rd->rto_count); - cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); + cpu_clear(rq->cpu, rq->rd->rto_mask); } static void update_rt_migration(struct rq *rq) @@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) } #ifdef CONFIG_SMP -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { return cpu_rq(smp_processor_id())->rd->span; } #else -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { - return cpu_online_mask; + return cpu_online_map; } #endif @@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) return rt_rq->rt_throttled; } -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { - return cpu_online_mask; + return cpu_online_map; } static inline @@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq) int i, weight, more = 0; u64 rt_period; - weight = cpumask_weight(rd->span); + weight = cpus_weight(rd->span); spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu(i, rd->span) { + for_each_cpu_mask_nr(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq) /* * Greedy reclaim, take back as much as we can. */ - for_each_cpu(i, rd->span) { + for_each_cpu_mask(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq) static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) { int i, idle = 1; - const struct cpumask *span; + cpumask_t span; if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return 1; span = sched_rt_period_mask(); - for_each_cpu(i, span) { + for_each_cpu_mask(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); @@ -805,20 +805,17 @@ static int select_task_rq_rt(struct task_struct *p, int sync) static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - cpumask_var_t mask; + cpumask_t mask; if (rq->curr->rt.nr_cpus_allowed == 1) return; - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) - return; - if (p->rt.nr_cpus_allowed != 1 - && cpupri_find(&rq->rd->cpupri, p, mask)) - goto free; + && cpupri_find(&rq->rd->cpupri, p, &mask)) + return; - if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) - goto free; + if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + return; /* * There appears to be other cpus that can accept @@ -827,8 +824,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) */ requeue_task_rt(rq, p, 1); resched_task(rq->curr); -free: - free_cpumask_var(mask); } #endif /* CONFIG_SMP */ @@ -919,7 +914,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && + (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && (p->rt.nr_cpus_allowed > 1)) return 1; return 0; @@ -958,7 +953,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) return next; } -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); +static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) { @@ -978,7 +973,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); @@ -993,7 +988,7 @@ static int find_lowest_rq(struct task_struct *task) * I guess we might want to change cpupri_find() to ignore those * in the first place. */ - cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); + cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); /* * At this point we have built a mask of cpus representing the @@ -1003,7 +998,7 @@ static int find_lowest_rq(struct task_struct *task) * We prioritize the last cpu that the task executed on since * it is most likely cache-hot in that location. */ - if (cpumask_test_cpu(cpu, lowest_mask)) + if (cpu_isset(cpu, *lowest_mask)) return cpu; /* @@ -1018,8 +1013,7 @@ static int find_lowest_rq(struct task_struct *task) cpumask_t domain_mask; int best_cpu; - cpumask_and(&domain_mask, sched_domain_span(sd), - lowest_mask); + cpus_and(domain_mask, sd->span, *lowest_mask); best_cpu = pick_optimal_cpu(this_cpu, &domain_mask); @@ -1060,8 +1054,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, - &task->cpus_allowed) || + !cpu_isset(lowest_rq->cpu, + task->cpus_allowed) || task_running(rq, task) || !task->se.on_rq)) { @@ -1182,7 +1176,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu(cpu, this_rq->rd->rto_mask) { + for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; @@ -1311,9 +1305,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, } static void set_cpus_allowed_rt(struct task_struct *p, - const struct cpumask *new_mask) + const cpumask_t *new_mask) { - int weight = cpumask_weight(new_mask); + int weight = cpus_weight(*new_mask); BUG_ON(!rt_task(p)); @@ -1334,7 +1328,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, update_rt_migration(rq); } - cpumask_copy(&p->cpus_allowed, new_mask); + p->cpus_allowed = *new_mask; p->rt.nr_cpus_allowed = weight; } @@ -1377,14 +1371,6 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p, if (!rq->rt.rt_nr_running) pull_rt_task(rq); } - -static inline void init_sched_rt_class(void) -{ - unsigned int i; - - for_each_possible_cpu(i) - alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL); -} #endif /* CONFIG_SMP */ /* @@ -1555,4 +1541,3 @@ static void print_rt_stats(struct seq_file *m, int cpu) rcu_read_unlock(); } #endif /* CONFIG_SCHED_DEBUG */ - diff --git a/trunk/kernel/sched_stats.h b/trunk/kernel/sched_stats.h index f2773b5d1226..3b01098164c8 100644 --- a/trunk/kernel/sched_stats.h +++ b/trunk/kernel/sched_stats.h @@ -42,8 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v) for_each_domain(cpu, sd) { enum cpu_idle_type itype; - cpumask_scnprintf(mask_str, mask_len, - sched_domain_span(sd)); + cpumask_scnprintf(mask_str, mask_len, sd->span); seq_printf(seq, "domain%d %s", dcount++, mask_str); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { diff --git a/trunk/kernel/taskstats.c b/trunk/kernel/taskstats.c index 6d7dc4ec4aa5..bd6be76303cf 100644 --- a/trunk/kernel/taskstats.c +++ b/trunk/kernel/taskstats.c @@ -352,7 +352,7 @@ static int parse(struct nlattr *na, cpumask_t *mask) if (!data) return -ENOMEM; nla_strlcpy(data, na, len); - ret = cpulist_parse(data, mask); + ret = cpulist_parse(data, *mask); kfree(data); return ret; } diff --git a/trunk/kernel/time/clockevents.c b/trunk/kernel/time/clockevents.c index ea2f48af83cf..f8d968063cea 100644 --- a/trunk/kernel/time/clockevents.c +++ b/trunk/kernel/time/clockevents.c @@ -166,8 +166,6 @@ static void clockevents_notify_released(void) void clockevents_register_device(struct clock_event_device *dev) { BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); - BUG_ON(!dev->cpumask); - /* * A nsec2cyc multiplicator of 0 is invalid and we'd crash * on it, so fix it up and emit a warning: diff --git a/trunk/kernel/time/tick-broadcast.c b/trunk/kernel/time/tick-broadcast.c index 9590af2327be..f98a1b7b16e9 100644 --- a/trunk/kernel/time/tick-broadcast.c +++ b/trunk/kernel/time/tick-broadcast.c @@ -150,7 +150,7 @@ static void tick_do_broadcast(cpumask_t mask) */ cpu = first_cpu(mask); td = &per_cpu(tick_cpu_device, cpu); - td->evtdev->broadcast(&mask); + td->evtdev->broadcast(mask); } } diff --git a/trunk/kernel/time/tick-common.c b/trunk/kernel/time/tick-common.c index f8372be74122..df12434b43ca 100644 --- a/trunk/kernel/time/tick-common.c +++ b/trunk/kernel/time/tick-common.c @@ -136,7 +136,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) */ static void tick_setup_device(struct tick_device *td, struct clock_event_device *newdev, int cpu, - const struct cpumask *cpumask) + const cpumask_t *cpumask) { ktime_t next_event; void (*handler)(struct clock_event_device *) = NULL; @@ -171,8 +171,8 @@ static void tick_setup_device(struct tick_device *td, * When the device is not per cpu, pin the interrupt to the * current cpu: */ - if (!cpumask_equal(newdev->cpumask, cpumask)) - irq_set_affinity(newdev->irq, cpumask); + if (!cpus_equal(newdev->cpumask, *cpumask)) + irq_set_affinity(newdev->irq, *cpumask); /* * When global broadcasting is active, check if the current @@ -202,14 +202,14 @@ static int tick_check_new_device(struct clock_event_device *newdev) spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, newdev->cpumask)) + if (!cpu_isset(cpu, newdev->cpumask)) goto out_bc; td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; /* cpu local device ? */ - if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { + if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) { /* * If the cpu affinity of the device interrupt can not @@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) * If we have a cpu local device already, do not replace it * by a non cpu local device */ - if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) + if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu))) goto out_bc; } diff --git a/trunk/kernel/time/tick-sched.c b/trunk/kernel/time/tick-sched.c index 76a574bbef97..8f3fc2582d38 100644 --- a/trunk/kernel/time/tick-sched.c +++ b/trunk/kernel/time/tick-sched.c @@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void) if (!ts->tick_stopped) return; - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); now = ktime_get(); ts->idle_waketime = now; @@ -301,7 +301,7 @@ void tick_nohz_stop_sched_tick(int inidle) tick_do_timer_cpu = TICK_DO_TIMER_NONE; if (delta_jiffies > 1) - cpumask_set_cpu(cpu, nohz_cpu_mask); + cpu_set(cpu, nohz_cpu_mask); /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) @@ -319,7 +319,7 @@ void tick_nohz_stop_sched_tick(int inidle) /* * sched tick not stopped! */ - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); goto out; } @@ -361,7 +361,7 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: @@ -439,7 +439,7 @@ void tick_nohz_restart_sched_tick(void) select_nohz_load_balancer(0); now = ktime_get(); tick_do_update_jiffies64(now); - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); /* * We stopped the tick in idle. Update process times would miss the diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c index 0e91f43b6baf..4185d5221633 100644 --- a/trunk/kernel/trace/trace.c +++ b/trunk/kernel/trace/trace.c @@ -2674,7 +2674,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@ -2695,7 +2695,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); + err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); if (err) goto err_unlock; diff --git a/trunk/lib/Kconfig b/trunk/lib/Kconfig index 2ba43c4a5b07..fd4118e097f0 100644 --- a/trunk/lib/Kconfig +++ b/trunk/lib/Kconfig @@ -159,11 +159,4 @@ config CHECK_SIGNATURE config HAVE_LMB boolean -config CPUMASK_OFFSTACK - bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS - help - Use dynamic allocation for cpumask_var_t, instead of putting - them on the stack. This is a bit more expensive, but avoids - stack overflow. - endmenu diff --git a/trunk/mm/slub.c b/trunk/mm/slub.c index 0d861c3154b6..6cb7ad107852 100644 --- a/trunk/mm/slub.c +++ b/trunk/mm/slub.c @@ -3642,7 +3642,7 @@ static int list_locations(struct kmem_cache *s, char *buf, len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, - &l->cpus); + l->cpus); } if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && diff --git a/trunk/virt/kvm/ioapic.c b/trunk/virt/kvm/ioapic.c index 23b81cf242af..53772bb46320 100644 --- a/trunk/virt/kvm/ioapic.c +++ b/trunk/virt/kvm/ioapic.c @@ -150,11 +150,10 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic, static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) { kvm_inject_nmi(vcpu); - kvm_vcpu_kick(vcpu); } -u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, - u8 dest_mode) +static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, + u8 dest_mode) { u32 mask = 0; int i; @@ -208,8 +207,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) "vector=%x trig_mode=%x\n", dest, dest_mode, delivery_mode, vector, trig_mode); - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, - dest_mode); + deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); if (!deliver_bitmask) { ioapic_debug("no target on destination\n"); return 0; diff --git a/trunk/virt/kvm/ioapic.h b/trunk/virt/kvm/ioapic.h index 49c9581d2586..cd7ae7691c9d 100644 --- a/trunk/virt/kvm/ioapic.h +++ b/trunk/virt/kvm/ioapic.h @@ -85,7 +85,5 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); -u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, - u8 dest_mode); #endif diff --git a/trunk/virt/kvm/irq_comm.c b/trunk/virt/kvm/irq_comm.c index aa5d1e5c497e..55ad76ee2d09 100644 --- a/trunk/virt/kvm/irq_comm.c +++ b/trunk/virt/kvm/irq_comm.c @@ -61,9 +61,10 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); } -void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian) +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) { - hlist_del_init(&kian->link); + hlist_del(&kian->link); } /* The caller must hold kvm->lock mutex */ @@ -72,15 +73,11 @@ int kvm_request_irq_source_id(struct kvm *kvm) unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; int irq_source_id = find_first_zero_bit(bitmap, sizeof(kvm->arch.irq_sources_bitmap)); - if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); - return -EFAULT; - } - - ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); - set_bit(irq_source_id, bitmap); - + irq_source_id = -EFAULT; + } else + set_bit(irq_source_id, bitmap); return irq_source_id; } @@ -88,9 +85,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) { int i; - ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); - - if (irq_source_id < 0 || + if (irq_source_id <= 0 || irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); return; diff --git a/trunk/virt/kvm/kvm_main.c b/trunk/virt/kvm/kvm_main.c index fc6127cbea1f..a87f45edfae8 100644 --- a/trunk/virt/kvm/kvm_main.c +++ b/trunk/virt/kvm/kvm_main.c @@ -47,10 +47,6 @@ #include #include -#ifdef CONFIG_X86 -#include -#endif - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET #include "coalesced_mmio.h" #endif @@ -64,13 +60,10 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -static int msi2intx = 1; -module_param(msi2intx, bool, 0); - DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); -static cpumask_var_t cpus_hardware_enabled; +static cpumask_t cpus_hardware_enabled; struct kmem_cache *kvm_vcpu_cache; EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -82,60 +75,9 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); -static bool kvm_rebooting; +bool kvm_rebooting; #ifdef KVM_CAP_DEVICE_ASSIGNMENT - -#ifdef CONFIG_X86 -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) -{ - int vcpu_id; - struct kvm_vcpu *vcpu; - struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm); - int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK) - >> MSI_ADDR_DEST_ID_SHIFT; - int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK) - >> MSI_DATA_VECTOR_SHIFT; - int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.address_lo); - int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, - (unsigned long *)&dev->guest_msi.data); - int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.data); - u32 deliver_bitmask; - - BUG_ON(!ioapic); - - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, - dest_id, dest_mode); - /* IOAPIC delivery mode value is the same as MSI here */ - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); - if (vcpu != NULL) - kvm_apic_set_irq(vcpu, vector, trig_mode); - else - printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); - break; - case IOAPIC_FIXED: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) - kvm_apic_set_irq(vcpu, vector, trig_mode); - } - break; - default: - printk(KERN_INFO "kvm: unsupported MSI delivery mode\n"); - } -} -#else -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {} -#endif - static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) { @@ -162,16 +104,9 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * finer-grained lock, update this */ mutex_lock(&assigned_dev->kvm->lock); - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - else if (assigned_dev->irq_requested_type & - KVM_ASSIGNED_DEV_GUEST_MSI) { - assigned_device_msi_dispatch(assigned_dev); - enable_irq(assigned_dev->host_irq); - assigned_dev->host_irq_disabled = false; - } + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); mutex_unlock(&assigned_dev->kvm->lock); kvm_put_kvm(assigned_dev->kvm); } @@ -182,12 +117,8 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) (struct kvm_assigned_dev_kernel *) dev_id; kvm_get_kvm(assigned_dev->kvm); - schedule_work(&assigned_dev->interrupt_work); - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; - return IRQ_HANDLED; } @@ -201,32 +132,19 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) dev = container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - - /* The guest irq may be shared so this ack may be - * from another device. - */ - if (dev->host_irq_disabled) { - enable_irq(dev->host_irq); - dev->host_irq_disabled = false; - } + enable_irq(dev->host_irq); } -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) { - if (!irqchip_in_kernel(kvm)) - return; - - kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); + if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) + free_irq(assigned_dev->host_irq, (void *)assigned_dev); - if (assigned_dev->irq_source_id != -1) - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); - assigned_dev->irq_source_id = -1; - - if (!assigned_dev->irq_requested_type) - return; + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); if (cancel_work_sync(&assigned_dev->interrupt_work)) /* We had pending work. That means we will have to take @@ -234,23 +152,6 @@ static void kvm_free_assigned_irq(struct kvm *kvm, */ kvm_put_kvm(kvm); - free_irq(assigned_dev->host_irq, (void *)assigned_dev); - - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) - pci_disable_msi(assigned_dev->dev); - - assigned_dev->irq_requested_type = 0; -} - - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - kvm_free_assigned_irq(kvm, assigned_dev); - - pci_reset_function(assigned_dev->dev); - pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); pci_dev_put(assigned_dev->dev); @@ -273,95 +174,6 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) } } -static int assigned_device_update_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *adev, - struct kvm_assigned_irq *airq) -{ - adev->guest_irq = airq->guest_irq; - adev->ack_notifier.gsi = airq->guest_irq; - - if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) - return 0; - - if (irqchip_in_kernel(kvm)) { - if (!msi2intx && - adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) { - free_irq(adev->host_irq, (void *)kvm); - pci_disable_msi(adev->dev); - } - - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (airq->host_irq) - adev->host_irq = airq->host_irq; - else - adev->host_irq = adev->dev->irq; - - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(adev->host_irq, kvm_assigned_dev_intr, - 0, "kvm_assigned_intx_device", (void *)adev)) - return -EIO; - } - - adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | - KVM_ASSIGNED_DEV_HOST_INTX; - return 0; -} - -#ifdef CONFIG_X86 -static int assigned_device_update_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *adev, - struct kvm_assigned_irq *airq) -{ - int r; - - if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { - /* x86 don't care upper address of guest msi message addr */ - adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; - adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; - adev->guest_msi.address_lo = airq->guest_msi.addr_lo; - adev->guest_msi.data = airq->guest_msi.data; - adev->ack_notifier.gsi = -1; - } else if (msi2intx) { - adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; - adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; - adev->guest_irq = airq->guest_irq; - adev->ack_notifier.gsi = airq->guest_irq; - } - - if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) - return 0; - - if (irqchip_in_kernel(kvm)) { - if (!msi2intx) { - if (adev->irq_requested_type & - KVM_ASSIGNED_DEV_HOST_INTX) - free_irq(adev->host_irq, (void *)adev); - - r = pci_enable_msi(adev->dev); - if (r) - return r; - } - - adev->host_irq = adev->dev->irq; - if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_msi_device", (void *)adev)) - return -EIO; - } - - if (!msi2intx) - adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; - - adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; - return 0; -} -#endif - static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, struct kvm_assigned_irq *assigned_irq) @@ -378,68 +190,49 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, return -EINVAL; } - if (!match->irq_requested_type) { - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); - if (irqchip_in_kernel(kvm)) { - /* Register ack nofitier */ - match->ack_notifier.gsi = -1; - match->ack_notifier.irq_acked = - kvm_assigned_dev_ack_irq; - kvm_register_irq_ack_notifier(kvm, - &match->ack_notifier); - - /* Request IRQ source ID */ - r = kvm_request_irq_source_id(kvm); - if (r < 0) - goto out_release; - else - match->irq_source_id = r; - -#ifdef CONFIG_X86 - /* Determine host device irq type, we can know the - * result from dev->msi_enabled */ - if (msi2intx) - pci_enable_msi(match->dev); -#endif - } + if (match->irq_requested) { + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + mutex_unlock(&kvm->lock); + return 0; } - if ((!msi2intx && - (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) || - (msi2intx && match->dev->msi_enabled)) { -#ifdef CONFIG_X86 - r = assigned_device_update_msi(kvm, match, assigned_irq); - if (r) { - printk(KERN_WARNING "kvm: failed to enable " - "MSI device!\n"); + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + if (irqchip_in_kernel(kvm)) { + if (!capable(CAP_SYS_RAWIO)) { + r = -EPERM; goto out_release; } -#else - r = -ENOTTY; -#endif - } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { - /* Host device IRQ 0 means don't support INTx */ - if (!msi2intx) { - printk(KERN_WARNING - "kvm: wait device to enable MSI!\n"); - r = 0; - } else { - printk(KERN_WARNING - "kvm: failed to enable MSI device!\n"); - r = -ENOTTY; + + if (assigned_irq->host_irq) + match->host_irq = assigned_irq->host_irq; + else + match->host_irq = match->dev->irq; + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); + r = kvm_request_irq_source_id(kvm); + if (r < 0) goto out_release; - } - } else { - /* Non-sharing INTx mode */ - r = assigned_device_update_intx(kvm, match, assigned_irq); - if (r) { - printk(KERN_WARNING "kvm: failed to enable " - "INTx device!\n"); + else + match->irq_source_id = r; + + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_device", (void *)match)) { + r = -EIO; goto out_release; } } + match->irq_requested = true; mutex_unlock(&kvm->lock); return r; out_release: @@ -490,14 +283,11 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, __func__); goto out_disable; } - - pci_reset_function(dev); - match->assigned_dev_id = assigned_dev->assigned_dev_id; match->host_busnr = assigned_dev->busnr; match->host_devfn = assigned_dev->devfn; match->dev = dev; - match->irq_source_id = -1; + match->kvm = kvm; list_add(&match->list, &kvm->arch.assigned_dev_head); @@ -565,49 +355,58 @@ static void ack_flush(void *_completed) { } -static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) +void kvm_flush_remote_tlbs(struct kvm *kvm) { int i, cpu, me; - cpumask_var_t cpus; - bool called = true; + cpumask_t cpus; struct kvm_vcpu *vcpu; - if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) - cpumask_clear(cpus); - me = get_cpu(); + cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; if (!vcpu) continue; - if (test_and_set_bit(req, &vcpu->requests)) + if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpus != NULL && cpu != -1 && cpu != me) - cpumask_set_cpu(cpu, cpus); + if (cpu != -1 && cpu != me) + cpu_set(cpu, cpus); } - if (unlikely(cpus == NULL)) - smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); - else if (!cpumask_empty(cpus)) - smp_call_function_many(cpus, ack_flush, NULL, 1); - else - called = false; + if (cpus_empty(cpus)) + goto out; + ++kvm->stat.remote_tlb_flush; + smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: put_cpu(); - free_cpumask_var(cpus); - return called; -} - -void kvm_flush_remote_tlbs(struct kvm *kvm) -{ - if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) - ++kvm->stat.remote_tlb_flush; } void kvm_reload_remote_mmus(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); + int i, cpu, me; + cpumask_t cpus; + struct kvm_vcpu *vcpu; + + me = get_cpu(); + cpus_clear(cpus); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) + continue; + cpu = vcpu->cpu; + if (cpu != -1 && cpu != me) + cpu_set(cpu, cpus); + } + if (cpus_empty(cpus)) + goto out; + smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: + put_cpu(); } + int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { struct page *page; @@ -911,8 +710,6 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; - if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) - goto out; if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) @@ -1024,10 +821,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - kvm_free_physmem_slot(&old, npages ? &new : NULL); - /* Slot deletion case: we have to update the current slot */ - if (!npages) - *memslot = old; + kvm_free_physmem_slot(&old, &new); #ifdef CONFIG_DMAR /* map the pages in iommu page table */ r = kvm_iommu_map_pages(kvm, base_gfn, npages); @@ -1124,7 +918,7 @@ int kvm_is_error_hva(unsigned long addr) } EXPORT_SYMBOL_GPL(kvm_is_error_hva); -struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) +static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { int i; @@ -1137,12 +931,11 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) } return NULL; } -EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { gfn = unalias_gfn(kvm, gfn); - return gfn_to_memslot_unaliased(kvm, gfn); + return __gfn_to_memslot(kvm, gfn); } int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) @@ -1166,7 +959,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *slot; gfn = unalias_gfn(kvm, gfn); - slot = gfn_to_memslot_unaliased(kvm, gfn); + slot = __gfn_to_memslot(kvm, gfn); if (!slot) return bad_hva(); return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); @@ -1417,7 +1210,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *memslot; gfn = unalias_gfn(kvm, gfn); - memslot = gfn_to_memslot_unaliased(kvm, gfn); + memslot = __gfn_to_memslot(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; @@ -1502,7 +1295,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) return 0; } -static struct file_operations kvm_vcpu_fops = { +static const struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl, @@ -1896,7 +1689,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static struct file_operations kvm_vm_fops = { +static const struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .compat_ioctl = kvm_vm_ioctl, @@ -1918,18 +1711,6 @@ static int kvm_dev_ioctl_create_vm(void) return fd; } -static long kvm_dev_ioctl_check_extension_generic(long arg) -{ - switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - return 1; - default: - break; - } - return kvm_dev_ioctl_check_extension(arg); -} - static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1949,7 +1730,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension_generic(arg); + r = kvm_dev_ioctl_check_extension(arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; @@ -1990,9 +1771,9 @@ static void hardware_enable(void *junk) { int cpu = raw_smp_processor_id(); - if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) + if (cpu_isset(cpu, cpus_hardware_enabled)) return; - cpumask_set_cpu(cpu, cpus_hardware_enabled); + cpu_set(cpu, cpus_hardware_enabled); kvm_arch_hardware_enable(NULL); } @@ -2000,9 +1781,9 @@ static void hardware_disable(void *junk) { int cpu = raw_smp_processor_id(); - if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) + if (!cpu_isset(cpu, cpus_hardware_enabled)) return; - cpumask_clear_cpu(cpu, cpus_hardware_enabled); + cpu_clear(cpu, cpus_hardware_enabled); kvm_arch_hardware_disable(NULL); } @@ -2236,14 +2017,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, bad_pfn = page_to_pfn(bad_page); - if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { - r = -ENOMEM; - goto out_free_0; - } - r = kvm_arch_hardware_setup(); if (r < 0) - goto out_free_0a; + goto out_free_0; for_each_online_cpu(cpu) { smp_call_function_single(cpu, @@ -2277,8 +2053,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, } kvm_chardev_ops.owner = module; - kvm_vm_fops.owner = module; - kvm_vcpu_fops.owner = module; r = misc_register(&kvm_dev); if (r) { @@ -2288,9 +2062,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; -#ifndef CONFIG_X86 - msi2intx = 0; -#endif return 0; @@ -2307,8 +2078,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); -out_free_0a: - free_cpumask_var(cpus_hardware_enabled); out_free_0: __free_page(bad_page); out: @@ -2332,7 +2101,6 @@ void kvm_exit(void) kvm_arch_hardware_unsetup(); kvm_arch_exit(); kvm_exit_debug(); - free_cpumask_var(cpus_hardware_enabled); __free_page(bad_page); } EXPORT_SYMBOL_GPL(kvm_exit); diff --git a/trunk/virt/kvm/kvm_trace.c b/trunk/virt/kvm/kvm_trace.c index f59874446440..41dcc845f78c 100644 --- a/trunk/virt/kvm/kvm_trace.c +++ b/trunk/virt/kvm/kvm_trace.c @@ -252,7 +252,6 @@ void kvm_trace_cleanup(void) struct kvm_trace_probe *p = &kvm_trace_probes[i]; marker_probe_unregister(p->name, p->probe_func, p); } - marker_synchronize_unregister(); relay_close(kt->rchan); debugfs_remove(kt->lost_file);