diff --git a/[refs] b/[refs] index c506c8b1c7d1..f2914b0f1cf0 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 7eb19553369c46cc1fa64caf120cbcab1b597f7c +refs/heads/master: 0999769e6cad9b0e5abb7c513c0c3f16821f0884 diff --git a/trunk/Documentation/cpu-hotplug.txt b/trunk/Documentation/cpu-hotplug.txt index 9d620c153b04..94bbc27ddd4f 100644 --- a/trunk/Documentation/cpu-hotplug.txt +++ b/trunk/Documentation/cpu-hotplug.txt @@ -50,17 +50,16 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets cpu_possible_map = cpu_present_map + additional_cpus (*) Option valid only for following architectures -- ia64 +- x86_64, ia64 -ia64 uses the number of disabled local apics in ACPI tables MADT to -determine the number of potentially hot-pluggable cpus. The implementation -should only rely on this to count the # of cpus, but *MUST* not rely -on the apicid values in those tables for disabled apics. In the event -BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could -use this parameter "additional_cpus=x" to represent those cpus in the -cpu_possible_map. +ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT +to determine the number of potentially hot-pluggable cpus. The implementation +should only rely on this to count the # of cpus, but *MUST* not rely on the +apicid values in those tables for disabled apics. In the event BIOS doesn't +mark such hot-pluggable cpus as disabled entries, one could use this +parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map. -possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus. +possible_cpus=n [s390 only] use this to set hotpluggable cpus. This option sets possible_cpus bits in cpu_possible_map. Thus keeping the numbers of bits set constant even if the machine gets rebooted. diff --git a/trunk/arch/cris/include/asm/bitops.h b/trunk/arch/cris/include/asm/bitops.h index c0e62f811e09..9e69cfb7f134 100644 --- a/trunk/arch/cris/include/asm/bitops.h +++ b/trunk/arch/cris/include/asm/bitops.h @@ -148,6 +148,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) #define ffs kernel_ffs #include +#include #include #include #include diff --git a/trunk/arch/ia64/include/asm/topology.h b/trunk/arch/ia64/include/asm/topology.h index 76a33a91ca69..97ae7f509109 100644 --- a/trunk/arch/ia64/include/asm/topology.h +++ b/trunk/arch/ia64/include/asm/topology.h @@ -56,6 +56,7 @@ void build_cpu_to_node_map(void); #define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -80,6 +81,7 @@ void build_cpu_to_node_map(void); /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/mips/include/asm/mach-ip27/topology.h b/trunk/arch/mips/include/asm/mach-ip27/topology.h index 55d481569a1f..c1c3f5b2f18f 100644 --- a/trunk/arch/mips/include/asm/mach-ip27/topology.h +++ b/trunk/arch/mips/include/asm/mach-ip27/topology.h @@ -39,6 +39,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; /* sched_domains SD_NODE_INIT for SGI IP27 machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/powerpc/include/asm/topology.h b/trunk/arch/powerpc/include/asm/topology.h index 375258559ae6..236dae1cd29f 100644 --- a/trunk/arch/powerpc/include/asm/topology.h +++ b/trunk/arch/powerpc/include/asm/topology.h @@ -52,6 +52,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/sh/include/asm/topology.h b/trunk/arch/sh/include/asm/topology.h index 066f0fba590e..9aa160d0efe5 100644 --- a/trunk/arch/sh/include/asm/topology.h +++ b/trunk/arch/sh/include/asm/topology.h @@ -5,6 +5,7 @@ /* sched_domains SD_NODE_INIT for sh machines */ #define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 249d1e0824b5..0f44add3e0b7 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -601,20 +601,19 @@ config IOMMU_HELPER config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL - select CPUMASK_OFFSTACK + depends on X86_64 && SMP && BROKEN default n help Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. config NR_CPUS - int "Maximum number of CPUs" if SMP && !MAXSMP - range 2 512 if SMP && !MAXSMP - default "1" if !SMP + int "Maximum number of CPUs (2-512)" if !MAXSMP + range 2 512 + depends on SMP default "4096" if MAXSMP - default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) - default "8" if SMP + default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 + default "8" help This allows you to specify the maximum number of CPUs which this kernel will support. The maximum supported value is 512 and the diff --git a/trunk/arch/x86/include/asm/bigsmp/apic.h b/trunk/arch/x86/include/asm/bigsmp/apic.h index d8dd9f537911..ce547f24a1cd 100644 --- a/trunk/arch/x86/include/asm/bigsmp/apic.h +++ b/trunk/arch/x86/include/asm/bigsmp/apic.h @@ -9,12 +9,12 @@ static inline int apic_id_registered(void) return (1); } -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { #ifdef CONFIG_SMP - return &cpu_online_map; + return cpu_online_map; #else - return &cpumask_of_cpu(0); + return cpumask_of_cpu(0); #endif } @@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < nr_cpu_ids) + if (mps_cpu < NR_CPUS) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); return BAD_APICID; @@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= nr_cpu_ids) + if (cpu >= NR_CPUS) return BAD_APICID; return cpu_physical_id(cpu); } @@ -119,34 +119,16 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; int apicid; - cpu = first_cpu(*cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return cpu_to_logical_apicid(cpu); - - return BAD_APICID; -} - static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/trunk/arch/x86/include/asm/bigsmp/ipi.h b/trunk/arch/x86/include/asm/bigsmp/ipi.h index 27fcd01b3ae6..9404c535b7ec 100644 --- a/trunk/arch/x86/include/asm/bigsmp/ipi.h +++ b/trunk/arch/x86/include/asm/bigsmp/ipi.h @@ -1,22 +1,25 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/trunk/arch/x86/include/asm/desc.h b/trunk/arch/x86/include/asm/desc.h index dc27705f5443..e6b82b17b072 100644 --- a/trunk/arch/x86/include/asm/desc.h +++ b/trunk/arch/x86/include/asm/desc.h @@ -320,14 +320,16 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } +#define SYS_VECTOR_FREE 0 +#define SYS_VECTOR_ALLOCED 1 + extern int first_system_vector; -/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ -extern unsigned long used_vectors[]; +extern char system_vectors[]; static inline void alloc_system_vector(int vector) { - if (!test_bit(vector, used_vectors)) { - set_bit(vector, used_vectors); + if (system_vectors[vector] == SYS_VECTOR_FREE) { + system_vectors[vector] = SYS_VECTOR_ALLOCED; if (first_system_vector > vector) first_system_vector = vector; } else diff --git a/trunk/arch/x86/include/asm/es7000/apic.h b/trunk/arch/x86/include/asm/es7000/apic.h index 51ac1230294e..e24ef876915f 100644 --- a/trunk/arch/x86/include/asm/es7000/apic.h +++ b/trunk/arch/x86/include/asm/es7000/apic.h @@ -9,14 +9,14 @@ static inline int apic_id_registered(void) return (1); } -static inline const cpumask_t *target_cpus_cluster(void) +static inline cpumask_t target_cpus_cluster(void) { - return &CPU_MASK_ALL; + return CPU_MASK_ALL; } -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { - return &cpumask_of_cpu(smp_processor_id()); + return cpumask_of_cpu(smp_processor_id()); } #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) @@ -80,10 +80,9 @@ extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*target_cpus())[0]); + "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) @@ -101,7 +100,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) + else if (mps_cpu < NR_CPUS) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -121,9 +120,9 @@ extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif @@ -147,15 +146,14 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) return (1); } -static inline unsigned int -cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpumask_weight(cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return 0xFF; @@ -163,10 +161,10 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -181,14 +179,14 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(*cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return cpu_to_logical_apicid(0); @@ -196,52 +194,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid = cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -253,8 +209,6 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, } cpu++; } -exit: - free_cpumask_var(cpumask); return apicid; } diff --git a/trunk/arch/x86/include/asm/es7000/ipi.h b/trunk/arch/x86/include/asm/es7000/ipi.h index 7e8ed24d4b8a..632a955fcc0a 100644 --- a/trunk/arch/x86/include/asm/es7000/ipi.h +++ b/trunk/arch/x86/include/asm/es7000/ipi.h @@ -1,22 +1,24 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/trunk/arch/x86/include/asm/genapic_32.h b/trunk/arch/x86/include/asm/genapic_32.h index 746f37a7963a..0ac17d33a8c7 100644 --- a/trunk/arch/x86/include/asm/genapic_32.h +++ b/trunk/arch/x86/include/asm/genapic_32.h @@ -24,7 +24,7 @@ struct genapic { int (*probe)(void); int (*apic_id_registered)(void); - const struct cpumask *(*target_cpus)(void); + cpumask_t (*target_cpus)(void); int int_delivery_mode; int int_dest_mode; int ESR_DISABLE; @@ -57,16 +57,12 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + cpumask_t (*vector_allocation_domain)(int cpu); #ifdef CONFIG_SMP /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); + void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif @@ -118,7 +114,6 @@ struct genapic { APICFUNC(get_apic_id) \ .apic_id_mask = APIC_ID_MASK, \ APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(cpu_mask_to_apicid_and) \ APICFUNC(vector_allocation_domain) \ APICFUNC(acpi_madt_oem_check) \ IPIFUNC(send_IPI_mask) \ diff --git a/trunk/arch/x86/include/asm/genapic_64.h b/trunk/arch/x86/include/asm/genapic_64.h index adf32fb56aa6..2cae011668b7 100644 --- a/trunk/arch/x86/include/asm/genapic_64.h +++ b/trunk/arch/x86/include/asm/genapic_64.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H -#include - /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -20,20 +18,16 @@ struct genapic { u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); - const struct cpumask *(*target_cpus)(void); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + cpumask_t (*target_cpus)(void); + cpumask_t (*vector_allocation_domain)(int cpu); void (*init_apic_ldr)(void); /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); + void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); /* */ - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/trunk/arch/x86/include/asm/ipi.h b/trunk/arch/x86/include/asm/ipi.h index c745a306f7d3..f89dffb28aa9 100644 --- a/trunk/arch/x86/include/asm/ipi.h +++ b/trunk/arch/x86/include/asm/ipi.h @@ -117,8 +117,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(const struct cpumask *mask, - int vector) +static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; @@ -129,29 +128,11 @@ static inline void send_IPI_mask_sequence(const struct cpumask *mask, * - mbligh */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static inline void send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); -} - #endif /* _ASM_X86_IPI_H */ diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h index 592688ed04d3..28e409fc73f3 100644 --- a/trunk/arch/x86/include/asm/irq.h +++ b/trunk/arch/x86/include/asm/irq.h @@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include -extern void fixup_irqs(void); +extern void fixup_irqs(cpumask_t map); #endif extern unsigned int do_IRQ(struct pt_regs *regs); @@ -42,6 +42,5 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); -extern int vector_used_by_percpu_irq(unsigned int vector); #endif /* _ASM_X86_IRQ_H */ diff --git a/trunk/arch/x86/include/asm/mach-default/mach_apic.h b/trunk/arch/x86/include/asm/mach-default/mach_apic.h index cc09cbbee27e..6cb3a467e067 100644 --- a/trunk/arch/x86/include/asm/mach-default/mach_apic.h +++ b/trunk/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,12 +8,12 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const struct cpumask *target_cpus(void) +static inline cpumask_t target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_mask; + return cpu_online_map; #else - return cpumask_of(0); + return cpumask_of_cpu(0); #endif } @@ -28,7 +28,6 @@ static inline const struct cpumask *target_cpus(void) #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) @@ -62,19 +61,9 @@ static inline int apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { - return cpumask_bits(cpumask)[0]; -} - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - - return (unsigned int)(mask1 & mask2 & mask3); + return cpus_addr(cpumask)[0]; } static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) @@ -99,7 +88,7 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) +static inline cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -109,7 +98,8 @@ static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } #endif @@ -141,7 +131,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; diff --git a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h index 191312d155da..fabca01ebacf 100644 --- a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,8 +4,7 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_bitmask(cpumask_t mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; @@ -13,27 +12,28 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include #define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_bitmask(mask, vector); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __local_send_IPI_allbutself(int vector) { - if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask_allbutself(cpu_online_mask, vector); - else + if (no_broadcast || vector == NMI_VECTOR) { + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + send_IPI_mask(mask, vector); + } else __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void __local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector); } diff --git a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h index 48553e958ad5..e430f47df667 100644 --- a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h @@ -24,7 +24,6 @@ #define check_phys_apicid_present (genapic->check_phys_apicid_present) #define check_apicid_used (genapic->check_apicid_used) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define vector_allocation_domain (genapic->vector_allocation_domain) #define enable_apic_mode (genapic->enable_apic_mode) #define phys_pkg_id (genapic->phys_pkg_id) diff --git a/trunk/arch/x86/include/asm/numaq/apic.h b/trunk/arch/x86/include/asm/numaq/apic.h index c80f00d29965..0bf2a06b7a4e 100644 --- a/trunk/arch/x86/include/asm/numaq/apic.h +++ b/trunk/arch/x86/include/asm/numaq/apic.h @@ -7,9 +7,9 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { - return &CPU_MASK_ALL; + return CPU_MASK_ALL; } #define NO_BALANCE_IRQ (1) @@ -122,13 +122,7 @@ static inline void enable_apic_mode(void) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return (int) 0xF; -} - -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { return (int) 0xF; } diff --git a/trunk/arch/x86/include/asm/numaq/ipi.h b/trunk/arch/x86/include/asm/numaq/ipi.h index a8374c652778..935588d286cf 100644 --- a/trunk/arch/x86/include/asm/numaq/ipi.h +++ b/trunk/arch/x86/include/asm/numaq/ipi.h @@ -1,22 +1,25 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void send_IPI_mask_sequence(cpumask_t, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/trunk/arch/x86/include/asm/smp.h b/trunk/arch/x86/include/asm/smp.h index 830b9fcb6427..d12811ce51d9 100644 --- a/trunk/arch/x86/include/asm/smp.h +++ b/trunk/arch/x86/include/asm/smp.h @@ -60,7 +60,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(const struct cpumask *mask); + void (*send_call_func_ipi)(cpumask_t mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) static inline void arch_send_call_function_ipi(cpumask_t mask) { - smp_ops.send_call_func_ipi(&mask); + smp_ops.send_call_func_ipi(mask); } void cpu_disable_common(void); @@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(const struct cpumask *mask); +void native_send_call_func_ipi(cpumask_t mask); void native_send_call_func_single_ipi(int cpu); extern void prefill_possible_map(void); diff --git a/trunk/arch/x86/include/asm/summit/apic.h b/trunk/arch/x86/include/asm/summit/apic.h index 99327d1be49f..9b3070f1c2ac 100644 --- a/trunk/arch/x86/include/asm/summit/apic.h +++ b/trunk/arch/x86/include/asm/summit/apic.h @@ -14,13 +14,13 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline cpumask_t target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing * Just start on cpu 0. IRQ balancing will spread load */ - return &cpumask_of_cpu(0); + return cpumask_of_cpu(0); } #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -137,14 +137,14 @@ static inline void enable_apic_mode(void) { } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(*cpumask); + num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return (int) 0xFF; @@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(*cpumask); + cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { + if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -170,49 +170,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid = 0xFF; - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return (int) 0xFF; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } -exit: - free_cpumask_var(cpumask); - return apicid; -} - /* cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. diff --git a/trunk/arch/x86/include/asm/summit/ipi.h b/trunk/arch/x86/include/asm/summit/ipi.h index a8a2c24f50cc..53bd1e7bd7b4 100644 --- a/trunk/arch/x86/include/asm/summit/ipi.h +++ b/trunk/arch/x86/include/asm/summit/ipi.h @@ -1,10 +1,9 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_sequence(cpumask_t mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -15,12 +14,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + send_IPI_mask(mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + send_IPI_mask(cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/trunk/arch/x86/include/asm/topology.h b/trunk/arch/x86/include/asm/topology.h index 4e2f2e0aab27..168203c0c316 100644 --- a/trunk/arch/x86/include/asm/topology.h +++ b/trunk/arch/x86/include/asm/topology.h @@ -238,8 +238,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) -#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) /* indicates that pointers to the topology cpumask_t maps are valid */ #define arch_provides_topology_pointers yes diff --git a/trunk/arch/x86/kernel/apic.c b/trunk/arch/x86/kernel/apic.c index 6b7f824db160..6107b41da9a5 100644 --- a/trunk/arch/x86/kernel/apic.c +++ b/trunk/arch/x86/kernel/apic.c @@ -119,6 +119,8 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); int first_system_vector = 0xfe; +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Debug level, exported for io_apic.c */ @@ -140,7 +142,7 @@ static int lapic_next_event(unsigned long delta, struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); -static void lapic_timer_broadcast(const cpumask_t *mask); +static void lapic_timer_broadcast(const struct cpumask *mask); static void apic_pm_activate(void); /* @@ -453,10 +455,10 @@ static void lapic_timer_setup(enum clock_event_mode mode, /* * Local APIC timer broadcast function */ -static void lapic_timer_broadcast(const cpumask_t *mask) +static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + send_IPI_mask(*mask, LOCAL_TIMER_VECTOR); #endif } @@ -1805,32 +1807,28 @@ void disconnect_bsp_APIC(int virt_wire_setup) void __cpuinit generic_processor_info(int apicid, int version) { int cpu; + cpumask_t tmp_map; /* * Validate version */ if (version == 0x0) { pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); + "fixing up to 0x10. (tell your hw vendor)\n", + version); version = 0x10; } apic_version[apicid] = version; - if (num_processors >= nr_cpu_ids) { - int max = nr_cpu_ids; - int thiscpu = max + disabled_cpus; - - pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i reached." - " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; + if (num_processors >= NR_CPUS) { + pr_warning("WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); return; } num_processors++; - cpu = cpumask_next_zero(-1, cpu_present_mask); + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { @@ -1880,8 +1878,8 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif - set_cpu_possible(cpu, true); - set_cpu_present(cpu, true); + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } #ifdef CONFIG_X86_64 @@ -2083,7 +2081,7 @@ __cpuinit int apic_is_clustered_box(void) bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { /* are we being called early in kernel startup? */ if (bios_cpu_apicid) { id = bios_cpu_apicid[i]; diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d77be78..15cf14e9bf26 100644 --- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -534,16 +534,31 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) per_cpu(cpuid4_info, cpu) = NULL; } -static void __cpuinit get_cpu_leaves(void *_retval) +static int __cpuinit detect_cache_attributes(unsigned int cpu) { - int j, *retval = _retval, cpu = smp_processor_id(); + struct _cpuid4_info *this_leaf; + unsigned long j; + int retval; + cpumask_t oldmask; + + if (num_cache_leaves == 0) + return -ENOENT; + + per_cpu(cpuid4_info, cpu) = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (per_cpu(cpuid4_info, cpu) == NULL) + return -ENOMEM; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + if (retval) + goto out; /* Do cpuid and store the results */ for (j = 0; j < num_cache_leaves; j++) { - struct _cpuid4_info *this_leaf; this_leaf = CPUID4_INFO_IDX(cpu, j); - *retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(*retval < 0)) { + retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(retval < 0)) { int i; for (i = 0; i < j; i++) @@ -552,21 +567,9 @@ static void __cpuinit get_cpu_leaves(void *_retval) } cache_shared_cpu_map_setup(cpu, j); } -} - -static int __cpuinit detect_cache_attributes(unsigned int cpu) -{ - int retval; - - if (num_cache_leaves == 0) - return -ENOENT; - - per_cpu(cpuid4_info, cpu) = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(cpuid4_info, cpu) == NULL) - return -ENOMEM; + set_cpus_allowed_ptr(current, &oldmask); - smp_call_function_single(cpu, get_cpu_leaves, &retval, true); +out: if (retval) { kfree(per_cpu(cpuid4_info, cpu)); per_cpu(cpuid4_info, cpu) = NULL; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index a5a5e0530370..748c8f9e7a05 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -83,41 +83,34 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ * CPU Initialization */ -struct thresh_restart { - struct threshold_block *b; - int reset; - u16 old_limit; -}; - /* must be called with correct cpu affinity */ -static long threshold_restart_bank(void *_tr) +static void threshold_restart_bank(struct threshold_block *b, + int reset, u16 old_limit) { - struct thresh_restart *tr = _tr; u32 mci_misc_hi, mci_misc_lo; - rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + rdmsr(b->address, mci_misc_lo, mci_misc_hi); - if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ + if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + reset = 1; /* limit cannot be lower than err count */ - if (tr->reset) { /* reset err count and overflow bit */ + if (reset) { /* reset err count and overflow bit */ mci_misc_hi = (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); - } else if (tr->old_limit) { /* change limit w/o reset */ + (THRESHOLD_MAX - b->threshold_limit); + } else if (old_limit) { /* change limit w/o reset */ int new_count = (mci_misc_hi & THRESHOLD_MAX) + - (tr->old_limit - tr->b->threshold_limit); + (old_limit - b->threshold_limit); mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } - tr->b->interrupt_enable ? + b->interrupt_enable ? (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : (mci_misc_hi &= ~MASK_INT_TYPE_HI); mci_misc_hi |= MASK_COUNT_EN_HI; - wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); - return 0; + wrmsr(b->address, mci_misc_lo, mci_misc_hi); } /* cpu init entry point, called from mce.c with preempt off */ @@ -127,7 +120,6 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u8 lvt_off; u32 low = 0, high = 0, address = 0; - struct thresh_restart tr; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -170,10 +162,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) wrmsr(address, low, high); threshold_defaults.address = address; - tr.b = &threshold_defaults; - tr.reset = 0; - tr.old_limit = 0; - threshold_restart_bank(&tr); + threshold_restart_bank(&threshold_defaults, 0, 0); } } } @@ -262,6 +251,20 @@ struct threshold_attr { ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; +static void affinity_set(unsigned int cpu, cpumask_t *oldmask, + cpumask_t *newmask) +{ + *oldmask = current->cpus_allowed; + cpus_clear(*newmask); + cpu_set(cpu, *newmask); + set_cpus_allowed_ptr(current, newmask); +} + +static void affinity_restore(const cpumask_t *oldmask) +{ + set_cpus_allowed_ptr(current, oldmask); +} + #define SHOW_FIELDS(name) \ static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ { \ @@ -274,16 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { char *end; - struct thresh_restart tr; + cpumask_t oldmask, newmask; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; b->interrupt_enable = !!new; - tr.b = b; - tr.reset = 0; - tr.old_limit = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 0, 0); + affinity_restore(&oldmask); return end - buf; } @@ -292,7 +294,8 @@ static ssize_t store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { char *end; - struct thresh_restart tr; + cpumask_t oldmask, newmask; + u16 old; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; @@ -300,36 +303,34 @@ static ssize_t store_threshold_limit(struct threshold_block *b, new = THRESHOLD_MAX; if (new < 1) new = 1; - tr.old_limit = b->threshold_limit; + old = b->threshold_limit; b->threshold_limit = new; - tr.b = b; - tr.reset = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 0, old); + affinity_restore(&oldmask); return end - buf; } -static long local_error_count(void *_b) -{ - struct threshold_block *b = _b; - u32 low, high; - - rdmsr(b->address, low, high); - return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); -} - static ssize_t show_error_count(struct threshold_block *b, char *buf) { - return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); + u32 high, low; + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); + rdmsr(b->address, low, high); + affinity_restore(&oldmask); + return sprintf(buf, "%x\n", + (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); } static ssize_t store_error_count(struct threshold_block *b, const char *buf, size_t count) { - struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; - - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); + threshold_restart_bank(b, 1, 0); + affinity_restore(&oldmask); return 1; } @@ -462,19 +463,12 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, return err; } -static long local_allocate_threshold_blocks(void *_bank) -{ - unsigned int *bank = _bank; - - return allocate_threshold_blocks(smp_processor_id(), *bank, 0, - MSR_IA32_MC0_MISC + *bank * 4); -} - /* symlinks sibling shared banks to first core. first core owns dir/files. */ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; + cpumask_t oldmask, newmask; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -525,7 +519,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); + affinity_set(cpu, &oldmask, &newmask); + err = allocate_threshold_blocks(cpu, bank, 0, + MSR_IA32_MC0_MISC + bank * 4); + affinity_restore(&oldmask); + if (err) goto out_free; diff --git a/trunk/arch/x86/kernel/genapic_flat_64.c b/trunk/arch/x86/kernel/genapic_flat_64.c index 34185488e4fb..c0262791bda4 100644 --- a/trunk/arch/x86/kernel/genapic_flat_64.c +++ b/trunk/arch/x86/kernel/genapic_flat_64.c @@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static const struct cpumask *flat_target_cpus(void) +static cpumask_t flat_target_cpus(void) { - return cpu_online_mask; + return cpu_online_map; } -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t flat_vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -45,8 +45,8 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } /* @@ -69,8 +69,9 @@ static void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void _flat_send_IPI_mask(unsigned long mask, int vector) +static void flat_send_IPI_mask(cpumask_t cpumask, int vector) { + unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); @@ -78,41 +79,20 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) local_irq_restore(flags); } -static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - - _flat_send_IPI_mask(mask, vector); -} - -static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - int cpu = smp_processor_id(); - - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); - _flat_send_IPI_mask(mask, vector); -} - static void flat_send_IPI_allbutself(int vector) { - int cpu = smp_processor_id(); #ifdef CONFIG_HOTPLUG_CPU int hotplug = 1; #else int hotplug = 0; #endif if (hotplug || vector == NMI_VECTOR) { - if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { - unsigned long mask = cpumask_bits(cpu_online_mask)[0]; + cpumask_t allbutme = cpu_online_map; - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + cpu_clear(smp_processor_id(), allbutme); - _flat_send_IPI_mask(mask, vector); - } + if (!cpus_empty(allbutme)) + flat_send_IPI_mask(allbutme, vector); } else if (num_online_cpus() > 1) { __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } @@ -121,7 +101,7 @@ static void flat_send_IPI_allbutself(int vector) static void flat_send_IPI_all(int vector) { if (vector == NMI_VECTOR) - flat_send_IPI_mask(cpu_online_mask, vector); + flat_send_IPI_mask(cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } @@ -155,18 +135,9 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; -} - -static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) { - unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; - unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; - - return mask1 & mask2; + return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; } static unsigned int phys_pkg_id(int index_msb) @@ -186,10 +157,8 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, @@ -219,39 +188,35 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static const struct cpumask *physflat_target_cpus(void) +static cpumask_t physflat_target_cpus(void) { - return cpu_online_mask; + return cpu_online_map; } -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t physflat_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + return cpumask_of_cpu(cpu); } -static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) +static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); } -static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - send_IPI_mask_allbutself(cpumask, vector); -} - static void physflat_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + cpumask_t allbutme = cpu_online_map; + + cpu_clear(smp_processor_id(), allbutme); + physflat_send_IPI_mask(allbutme, vector); } static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_mask, vector); + physflat_send_IPI_mask(cpu_online_map, vector); } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -259,31 +224,13 @@ static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - struct genapic apic_physflat = { .name = "physical flat", .acpi_madt_oem_check = physflat_acpi_madt_oem_check, @@ -296,10 +243,8 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_cluster.c b/trunk/arch/x86/kernel/genx2apic_cluster.c index 6ce497cc372d..f6a2c8eb48a6 100644 --- a/trunk/arch/x86/kernel/genx2apic_cluster.c +++ b/trunk/arch/x86/kernel/genx2apic_cluster.c @@ -22,18 +22,19 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *x2apic_target_cpus(void) +static cpumask_t x2apic_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } /* * for now each logical cpu is in its own vector allocation domain. */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t x2apic_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -55,53 +56,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, * at once. We have 16 cpu's in a cluster. This will minimize IPI register * writes. */ -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); -} - -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } local_irq_restore(flags); } static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_mask, vector); + x2apic_send_IPI_mask(cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -109,38 +89,21 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. + * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -187,10 +150,8 @@ struct genapic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_phys.c b/trunk/arch/x86/kernel/genx2apic_phys.c index 62895cf315ff..d042211768b7 100644 --- a/trunk/arch/x86/kernel/genx2apic_phys.c +++ b/trunk/arch/x86/kernel/genx2apic_phys.c @@ -29,15 +29,16 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *x2apic_target_cpus(void) +static cpumask_t x2apic_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t x2apic_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -53,54 +54,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, x2apic_icr_write(cfg, apicid); } -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu(query_cpu, mask) { + for_each_cpu_mask(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; - unsigned long this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_mask, vector); + x2apic_send_IPI_mask(cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -108,7 +87,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -116,30 +95,13 @@ static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -183,10 +145,8 @@ struct genapic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/genx2apic_uv_x.c b/trunk/arch/x86/kernel/genx2apic_uv_x.c index b193e082f6ce..dece17289731 100644 --- a/trunk/arch/x86/kernel/genx2apic_uv_x.c +++ b/trunk/arch/x86/kernel/genx2apic_uv_x.c @@ -79,15 +79,16 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const struct cpumask *uv_target_cpus(void) +static cpumask_t uv_target_cpus(void) { - return cpumask_of(0); + return cpumask_of_cpu(0); } -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) +static cpumask_t uv_vector_allocation_domain(int cpu) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; } int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) @@ -126,37 +127,28 @@ static void uv_send_IPI_one(int cpu, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -static void uv_send_IPI_mask(const struct cpumask *mask, int vector) +static void uv_send_IPI_mask(cpumask_t mask, int vector) { unsigned int cpu; - for_each_cpu(cpu, mask) - uv_send_IPI_one(cpu, vector); -} - -static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned int cpu; - unsigned int this_cpu = smp_processor_id(); - - for_each_cpu(cpu, mask) - if (cpu != this_cpu) + for_each_possible_cpu(cpu) + if (cpu_isset(cpu, mask)) uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_allbutself(int vector) { - unsigned int cpu; - unsigned int this_cpu = smp_processor_id(); + cpumask_t mask = cpu_online_map; - for_each_online_cpu(cpu) - if (cpu != this_cpu) - uv_send_IPI_one(cpu, vector); + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + uv_send_IPI_mask(mask, vector); } static void uv_send_IPI_all(int vector) { - uv_send_IPI_mask(cpu_online_mask, vector); + uv_send_IPI_mask(cpu_online_map, vector); } static int uv_apic_id_registered(void) @@ -168,7 +160,7 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) +static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -176,30 +168,13 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + cpu = first_cpu(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - return BAD_APICID; -} - static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -247,10 +222,8 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, - .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/trunk/arch/x86/kernel/io_apic.c b/trunk/arch/x86/kernel/io_apic.c index 3e070bb961d7..e7745961ed31 100644 --- a/trunk/arch/x86/kernel/io_apic.c +++ b/trunk/arch/x86/kernel/io_apic.c @@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) struct irq_cfg { struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; + cpumask_t domain; + cpumask_t old_domain; unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; @@ -152,22 +152,22 @@ static struct irq_cfg irq_cfgx[] = { #else static struct irq_cfg irq_cfgx[NR_IRQS] = { #endif - [0] = { .vector = IRQ0_VECTOR, }, - [1] = { .vector = IRQ1_VECTOR, }, - [2] = { .vector = IRQ2_VECTOR, }, - [3] = { .vector = IRQ3_VECTOR, }, - [4] = { .vector = IRQ4_VECTOR, }, - [5] = { .vector = IRQ5_VECTOR, }, - [6] = { .vector = IRQ6_VECTOR, }, - [7] = { .vector = IRQ7_VECTOR, }, - [8] = { .vector = IRQ8_VECTOR, }, - [9] = { .vector = IRQ9_VECTOR, }, - [10] = { .vector = IRQ10_VECTOR, }, - [11] = { .vector = IRQ11_VECTOR, }, - [12] = { .vector = IRQ12_VECTOR, }, - [13] = { .vector = IRQ13_VECTOR, }, - [14] = { .vector = IRQ14_VECTOR, }, - [15] = { .vector = IRQ15_VECTOR, }, + [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; void __init arch_early_irq_init(void) @@ -183,10 +183,6 @@ void __init arch_early_irq_init(void) for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; - alloc_bootmem_cpumask_var(&cfg[i].domain); - alloc_bootmem_cpumask_var(&cfg[i].old_domain); - if (i < NR_IRQS_LEGACY) - cpumask_setall(cfg[i].domain); } } @@ -211,20 +207,6 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); - if (cfg) { - /* FIXME: needs alloc_cpumask_var_node() */ - if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) { - kfree(cfg); - cfg = NULL; - } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { - free_cpumask_var(cfg->domain); - kfree(cfg); - cfg = NULL; - } else { - cpumask_clear(cfg->domain); - cpumask_clear(cfg->old_domain); - } - } printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); return cfg; @@ -347,14 +329,13 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) } } -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg = desc->chip_data; if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) + if (!cpus_intersects(desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -369,8 +350,7 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) +static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { } #endif @@ -501,26 +481,6 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; @@ -556,61 +516,48 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); -/* - * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid - * of that, or returns BAD_APICID and leaves desc->affinity untouched. - */ -static unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; unsigned int irq; - if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; irq = desc->irq; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; + return; - cpumask_and(&desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); -} -static void -set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, cfg); - } + __target_IO_APIC_irq(irq, dest, cfg); + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } -static void -set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) +static void set_ioapic_affinity_irq(unsigned int irq, + const struct cpumask *mask) { struct irq_desc *desc; desc = irq_to_desc(irq); - set_ioapic_affinity_irq_desc(desc, mask); + set_ioapic_affinity_irq_desc(desc, *mask); } #endif /* CONFIG_SMP */ @@ -1272,8 +1219,7 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1288,49 +1234,49 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; - int cpu, err; - cpumask_var_t tmp_mask; + int cpu; if ((cfg->move_in_progress) || cfg->move_cleanup_count) return -EBUSY; - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); old_vector = cfg->vector; if (old_vector) { - cpumask_and(tmp_mask, mask, cpu_online_mask); - cpumask_and(tmp_mask, cfg->domain, tmp_mask); - if (!cpumask_empty(tmp_mask)) { - free_cpumask_var(tmp_mask); + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) return 0; - } } - /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; - for_each_cpu_and(cpu, mask, cpu_online_mask) { + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; int new_cpu; int vector, offset; - vector_allocation_domain(cpu, tmp_mask); + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); vector = current_vector; offset = current_offset; next: vector += 8; if (vector >= first_system_vector) { - /* If out of vectors on large boxen, must share them. */ + /* If we run out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; - - if (test_bit(vector, used_vectors)) +#ifdef CONFIG_X86_64 + if (vector == IA32_SYSCALL_VECTOR) goto next; - - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif + for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; /* Found one! */ @@ -1338,21 +1284,18 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) current_offset = offset; if (old_vector) { cfg->move_in_progress = 1; - cpumask_copy(cfg->old_domain, cfg->domain); + cfg->old_domain = cfg->domain; } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) + for_each_cpu_mask_nr(new_cpu, new_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); - err = 0; - break; + cfg->domain = domain; + return 0; } - free_cpumask_var(tmp_mask); - return err; + return -ENOSPC; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { int err; unsigned long flags; @@ -1365,20 +1308,23 @@ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { + cpumask_t mask; int cpu, vector; BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; - cpumask_clear(cfg->domain); + cpus_clear(cfg->domain); if (likely(!cfg->move_in_progress)) return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + cpus_and(mask, cfg->old_domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -1403,7 +1349,7 @@ void __setup_vector_irq(int cpu) if (!desc) continue; cfg = desc->chip_data; - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; @@ -1415,7 +1361,7 @@ void __setup_vector_irq(int cpu) continue; cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -1551,17 +1497,18 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de { struct irq_cfg *cfg; struct IO_APIC_route_entry entry; - unsigned int dest; + cpumask_t mask; if (!IO_APIC_IRQ(irq)) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, TARGET_CPUS)) + mask = TARGET_CPUS; + if (assign_irq_vector(irq, cfg, mask)) return; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cpus_and(mask, cfg->domain, mask); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1571,7 +1518,8 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - dest, trigger, polarity, cfg->vector)) { + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); __clear_irq_vector(irq, cfg); @@ -2293,7 +2241,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2342,17 +2290,18 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void -migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; + cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; unsigned int irq; - if (!cpumask_intersects(mask, cpu_online_mask)) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; irq = desc->irq; @@ -2365,7 +2314,8 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) set_extra_move_desc(desc, mask); - dest = cpu_mask_to_apicid_and(cfg->domain, mask); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -2382,10 +2332,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) */ modify_irte(irq, &irte); - if (cfg->move_in_progress) - send_cleanup_vector(cfg); + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } - cpumask_copy(&desc->affinity, mask); + desc->affinity = mask; } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2407,11 +2361,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpus_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2448,12 +2402,11 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, - const struct cpumask *mask) +static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); + desc->pending_mask = mask; migrate_irq_remapped_level_desc(desc); return; } @@ -2465,7 +2418,7 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, { struct irq_desc *desc = irq_to_desc(irq); - set_ir_ioapic_affinity_irq_desc(desc, mask); + set_ir_ioapic_affinity_irq_desc(desc, *mask); } #endif @@ -2496,7 +2449,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (!cfg->move_cleanup_count) goto unlock; - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) goto unlock; __get_cpu_var(vector_irq)[vector] = -1; @@ -2533,14 +2486,20 @@ static void irq_complete_move(struct irq_desc **descp) vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; #endif - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } } #else static inline void irq_complete_move(struct irq_desc **descp) {} @@ -3265,13 +3224,16 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms struct irq_cfg *cfg; int err; unsigned dest; + cpumask_t tmp; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, cfg, tmp); if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3331,12 +3293,19 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (!cpumask_intersects(mask, cpu_online_mask)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, *mask)) + return; + + set_extra_move_desc(desc, *mask); + + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); read_msi_msg_desc(desc, &msg); @@ -3346,27 +3315,37 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); + cpumask_copy(&desc->affinity, mask); } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void -ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static void ir_set_msi_irq_affinity(unsigned int irq, + const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg = desc->chip_data; + struct irq_cfg *cfg; unsigned int dest; + cpumask_t tmp, cleanup_mask; struct irte irte; + if (!cpumask_intersects(mask, cpu_online_mask)) + return; + if (get_irte(irq, &irte)) return; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, *mask)) return; + set_extra_move_desc(desc, *mask); + + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3380,8 +3359,14 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) * at the new destination. So, time to cleanup the previous * vector allocation. */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + cpumask_copy(&desc->affinity, mask); } #endif @@ -3578,12 +3563,19 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (!cpumask_intersects(mask, cpu_online_mask)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, *mask)) + return; + + set_extra_move_desc(desc, *mask); + + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); dmar_msi_read(irq, &msg); @@ -3593,6 +3585,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + cpumask_copy(&desc->affinity, mask); } #endif /* CONFIG_SMP */ @@ -3632,12 +3625,19 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (!cpumask_intersects(mask, cpu_online_mask)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, *mask)) + return; + + set_extra_move_desc(desc, *mask); + + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); hpet_msi_read(irq, &msg); @@ -3647,6 +3647,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + cpumask_copy(&desc->affinity, mask); } #endif /* CONFIG_SMP */ @@ -3706,14 +3707,22 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; + cpumask_t tmp; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (!cpumask_intersects(mask, cpu_online_mask)) return; cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, *mask)) + return; + + set_extra_move_desc(desc, *mask); + + cpumask_and(&tmp, &cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); + cpumask_copy(&desc->affinity, mask); } #endif @@ -3733,14 +3742,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; int err; + cpumask_t tmp; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, cfg, tmp); if (!err) { struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3776,7 +3788,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset) { - const struct cpumask *eligible_cpu = cpumask_of(cpu); + const cpumask_t *eligible_cpu = get_cpu_mask(cpu); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3786,7 +3798,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, eligible_cpu); + err = assign_irq_vector(irq, cfg, *eligible_cpu); if (err != 0) return err; @@ -3805,7 +3817,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(eligible_cpu); + entry->dest = cpu_mask_to_apicid(*eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -4016,7 +4028,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; struct irq_desc *desc; struct irq_cfg *cfg; - const struct cpumask *mask; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -4047,7 +4059,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; diff --git a/trunk/arch/x86/kernel/ipi.c b/trunk/arch/x86/kernel/ipi.c index 285bbf8831fa..f1c688e46f35 100644 --- a/trunk/arch/x86/kernel/ipi.c +++ b/trunk/arch/x86/kernel/ipi.c @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) { - unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); __send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(const struct cpumask *mask, int vector) +void send_IPI_mask_sequence(cpumask_t mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -139,24 +139,12 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); - local_irq_restore(flags); -} - -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) + for_each_possible_cpu(query_cpu) { + if (cpu_isset(query_cpu, mask)) { __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + } + } local_irq_restore(flags); } diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c index bce53e1352a0..3f1d9d18df67 100644 --- a/trunk/arch/x86/kernel/irq.c +++ b/trunk/arch/x86/kernel/irq.c @@ -9,7 +9,6 @@ #include #include #include -#include atomic_t irq_err_count; @@ -191,5 +190,3 @@ u64 arch_irq_stat(void) #endif return sum; } - -EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c index 9dc5588f336a..9cf9cbbf7a02 100644 --- a/trunk/arch/x86/kernel/irq_32.c +++ b/trunk/arch/x86/kernel/irq_32.c @@ -233,28 +233,27 @@ unsigned int do_IRQ(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU #include -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) +void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; + cpumask_t mask; if (!desc) continue; if (irq == 2) continue; - affinity = &desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + cpus_and(mask, desc->affinity, map); + if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; + mask = map; } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); + desc->chip->set_affinity(irq, &mask); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/trunk/arch/x86/kernel/irq_64.c b/trunk/arch/x86/kernel/irq_64.c index 6383d50f82ea..54c69d47a771 100644 --- a/trunk/arch/x86/kernel/irq_64.c +++ b/trunk/arch/x86/kernel/irq_64.c @@ -80,17 +80,16 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) +void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { + cpumask_t mask; int break_affinity = 0; int set_affinity = 1; - const struct cpumask *affinity; if (!desc) continue; @@ -100,23 +99,23 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { + cpus_equal(desc->affinity, map)) { spin_unlock(&desc->lock); continue; } - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + cpus_and(mask, desc->affinity, map); + if (cpus_empty(mask)) { break_affinity = 1; - affinity = cpu_all_mask; + mask = map; } if (desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); + desc->chip->set_affinity(irq, &mask); else if (!(warned++)) set_affinity = 0; diff --git a/trunk/arch/x86/kernel/irqinit_32.c b/trunk/arch/x86/kernel/irqinit_32.c index 84723295f88a..203384ed2b5d 100644 --- a/trunk/arch/x86/kernel/irqinit_32.c +++ b/trunk/arch/x86/kernel/irqinit_32.c @@ -110,18 +110,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -158,12 +146,10 @@ void __init native_init_IRQ(void) alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* IPI for single call function */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/trunk/arch/x86/kernel/irqinit_64.c b/trunk/arch/x86/kernel/irqinit_64.c index 31ebfe38e96c..6190e6ef546c 100644 --- a/trunk/arch/x86/kernel/irqinit_64.c +++ b/trunk/arch/x86/kernel/irqinit_64.c @@ -69,18 +69,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - void __init init_ISA_irqs(void) { int i; @@ -133,7 +121,6 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif } diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index ba7b9a0e6063..61f718df6eec 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -592,7 +592,10 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - send_IPI_allbutself(NMI_VECTOR); + cpumask_t mask = cpu_online_map; + cpu_clear(safe_smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/trunk/arch/x86/kernel/setup_percpu.c b/trunk/arch/x86/kernel/setup_percpu.c index 49f3f709ee1f..8e8b1193add5 100644 --- a/trunk/arch/x86/kernel/setup_percpu.c +++ b/trunk/arch/x86/kernel/setup_percpu.c @@ -152,11 +152,6 @@ void __init setup_per_cpu_areas(void) old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); size = roundup(old_size, align); - - printk(KERN_INFO - "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", - NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -173,24 +168,24 @@ void __init setup_per_cpu_areas(void) "cpu %d has no node %d or node-local memory\n", cpu, node); if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d at %016lx\n", + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, __pa(MAX_DMA_ADDRESS)); if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d on node%d " - "at %016lx\n", - cpu, node, __pa(ptr)); + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", + NR_CPUS, nr_cpu_ids, nr_node_ids); + /* Setup percpu data maps */ setup_per_cpu_maps(); diff --git a/trunk/arch/x86/kernel/smp.c b/trunk/arch/x86/kernel/smp.c index beea2649a240..7e558db362c1 100644 --- a/trunk/arch/x86/kernel/smp.c +++ b/trunk/arch/x86/kernel/smp.c @@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); } -void native_send_call_func_ipi(const struct cpumask *mask) +void native_send_call_func_ipi(cpumask_t mask) { cpumask_t allbutself; allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - if (cpus_equal(*mask, allbutself) && + if (cpus_equal(mask, allbutself) && cpus_equal(cpu_online_map, cpu_callout_map)) send_IPI_allbutself(CALL_FUNCTION_VECTOR); else diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index 9e177a4077ee..c5392058cd07 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -1259,15 +1259,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } -static int __initdata setup_possible_cpus = -1; -static int __init _setup_possible_cpus(char *str) -{ - get_option(&str, &setup_possible_cpus); - return 0; -} -early_param("possible_cpus", _setup_possible_cpus); - - /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1280,7 +1271,7 @@ early_param("possible_cpus", _setup_possible_cpus); * * Three ways to find out the number of additional hotplug CPUs: * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM + * - The user can overwrite it with additional_cpus=NUM * - Otherwise don't reserve additional CPUs. * We do this because additional CPUs waste a lot of memory. * -AK @@ -1293,17 +1284,9 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; - if (setup_possible_cpus == -1) - possible = num_processors + disabled_cpus; - else - possible = setup_possible_cpus; - - if (possible > CONFIG_NR_CPUS) { - printk(KERN_WARNING - "%d Processors exceeds NR_CPUS limit of %d\n", - possible, CONFIG_NR_CPUS); - possible = CONFIG_NR_CPUS; - } + possible = num_processors + disabled_cpus; + if (possible > NR_CPUS) + possible = NR_CPUS; printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); @@ -1368,7 +1351,7 @@ void cpu_disable_common(void) lock_vector_lock(); remove_cpu_from_maps(cpu); unlock_vector_lock(); - fixup_irqs(); + fixup_irqs(cpu_online_map); } int native_cpu_disable(void) diff --git a/trunk/arch/x86/kernel/tlb_32.c b/trunk/arch/x86/kernel/tlb_32.c index ce5054642247..8da059f949be 100644 --- a/trunk/arch/x86/kernel/tlb_32.c +++ b/trunk/arch/x86/kernel/tlb_32.c @@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ diff --git a/trunk/arch/x86/kernel/tlb_64.c b/trunk/arch/x86/kernel/tlb_64.c index f8be6f1d2e48..29887d7081a9 100644 --- a/trunk/arch/x86/kernel/tlb_64.c +++ b/trunk/arch/x86/kernel/tlb_64.c @@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpus_empty(f->flush_cpumask)) cpu_relax(); diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index 2d1f4c7e4052..141907ab6e22 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -72,6 +72,9 @@ #include "cpu/mcheck/mce.h" +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -86,9 +89,6 @@ gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; #endif -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - static int ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) @@ -941,7 +941,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { +#ifdef CONFIG_X86_32 int i; +#endif #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); @@ -998,15 +1000,11 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); -#endif /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); -#ifdef CONFIG_X86_64 - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#else set_bit(SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/trunk/arch/x86/mach-generic/bigsmp.c b/trunk/arch/x86/mach-generic/bigsmp.c index bc4c7840b2a8..3624a364b7f3 100644 --- a/trunk/arch/x86/mach-generic/bigsmp.c +++ b/trunk/arch/x86/mach-generic/bigsmp.c @@ -42,10 +42,9 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + return cpumask_of_cpu(cpu); } static int probe_bigsmp(void) diff --git a/trunk/arch/x86/mach-generic/es7000.c b/trunk/arch/x86/mach-generic/es7000.c index 4ba5ccaa1584..7b4e6d0d1690 100644 --- a/trunk/arch/x86/mach-generic/es7000.c +++ b/trunk/arch/x86/mach-generic/es7000.c @@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -97,7 +97,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/trunk/arch/x86/mach-generic/numaq.c b/trunk/arch/x86/mach-generic/numaq.c index 511d7941364f..71a309b122e6 100644 --- a/trunk/arch/x86/mach-generic/numaq.c +++ b/trunk/arch/x86/mach-generic/numaq.c @@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -48,7 +48,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/trunk/arch/x86/mach-generic/summit.c b/trunk/arch/x86/mach-generic/summit.c index 2821ffc188b5..2c6d234e0009 100644 --- a/trunk/arch/x86/mach-generic/summit.c +++ b/trunk/arch/x86/mach-generic/summit.c @@ -24,7 +24,7 @@ static int probe_summit(void) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static cpumask_t vector_allocation_domain(int cpu) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -34,7 +34,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; } struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/trunk/arch/x86/mach-voyager/voyager_smp.c b/trunk/arch/x86/mach-voyager/voyager_smp.c index a5bc05492b1e..9c990185e9f2 100644 --- a/trunk/arch/x86/mach-voyager/voyager_smp.c +++ b/trunk/arch/x86/mach-voyager/voyager_smp.c @@ -672,7 +672,7 @@ void __init smp_boot_cpus(void) /* loop over all the extended VIC CPUs and boot them. The * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) continue; do_boot_cpu(i); diff --git a/trunk/arch/x86/mm/numa_64.c b/trunk/arch/x86/mm/numa_64.c index 71a14f89f89e..cebcbf152d46 100644 --- a/trunk/arch/x86/mm/numa_64.c +++ b/trunk/arch/x86/mm/numa_64.c @@ -278,7 +278,7 @@ void __init numa_init_array(void) int rr, i; rr = first_node(node_online_map); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); @@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) memnodemap[0] = 0; node_set_online(0); node_set(0, node_possible_map); - for (i = 0; i < nr_cpu_ids; i++) + for (i = 0; i < NR_CPUS; i++) numa_set_node(i, 0); e820_register_active_regions(0, start_pfn, last_pfn); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); diff --git a/trunk/arch/x86/mm/srat_64.c b/trunk/arch/x86/mm/srat_64.c index 09737c8af074..51c0a2fc14fe 100644 --- a/trunk/arch/x86/mm/srat_64.c +++ b/trunk/arch/x86/mm/srat_64.c @@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (!node_online(i)) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { int node = early_cpu_to_node(i); if (node == NUMA_NO_NODE) diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c index 503c240e26c7..773d68d3e912 100644 --- a/trunk/arch/x86/xen/mmu.c +++ b/trunk/arch/x86/xen/mmu.c @@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info) static void xen_drop_mm_ref(struct mm_struct *mm) { - cpumask_var_t mask; + cpumask_t mask; unsigned cpu; if (current->active_mm == mm) { @@ -1094,16 +1094,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) } /* Get the "official" set of cpus referring to our pagetable. */ - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) - && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) - continue; - smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); - } - return; - } - cpumask_copy(mask, &mm->cpu_vm_mask); + mask = mm->cpu_vm_mask; /* It's possible that a vcpu may have a stale reference to our cr3, because its in lazy mode, and it hasn't yet flushed @@ -1112,12 +1103,11 @@ static void xen_drop_mm_ref(struct mm_struct *mm) if needed. */ for_each_online_cpu(cpu) { if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) - cpumask_set_cpu(cpu, mask); + cpu_set(cpu, mask); } - if (!cpumask_empty(mask)) - smp_call_function_many(mask, drop_other_mm_ref, mm, 1); - free_cpumask_var(mask); + if (!cpus_empty(mask)) + smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); } #else static void xen_drop_mm_ref(struct mm_struct *mm) diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c index c44e2069c7c7..acd9b6705e02 100644 --- a/trunk/arch/x86/xen/smp.c +++ b/trunk/arch/x86/xen/smp.c @@ -33,7 +33,7 @@ #include "xen-ops.h" #include "mmu.h" -cpumask_var_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, callfunc_irq); @@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) { int i, rc; - for (i = 0; i < nr_cpu_ids; i++) { + for (i = 0; i < NR_CPUS; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; @@ -192,14 +192,11 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) if (xen_smp_intr_init(0)) BUG(); - if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) - panic("could not allocate xen_cpu_initialized_map\n"); - - cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); + xen_cpu_initialized_map = cpumask_of_cpu(0); /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) + for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) continue; cpu_clear(cpu, cpu_possible_map); } @@ -224,7 +221,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct vcpu_guest_context *ctxt; struct desc_struct *gdt; - if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) + if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); @@ -411,23 +408,24 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(const struct cpumask *mask, - enum ipi_vector vector) +static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) { unsigned cpu; - for_each_cpu_and(cpu, mask, cpu_online_mask) + cpus_and(mask, mask, cpu_online_map); + + for_each_cpu_mask_nr(cpu, mask) xen_send_IPI_one(cpu, vector); } -static void xen_smp_send_call_function_ipi(const struct cpumask *mask) +static void xen_smp_send_call_function_ipi(cpumask_t mask) { int cpu; xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { if (xen_vcpu_stolen(cpu)) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; @@ -437,8 +435,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(cpumask_of(cpu), - XEN_CALL_FUNCTION_SINGLE_VECTOR); + xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) diff --git a/trunk/arch/x86/xen/suspend.c b/trunk/arch/x86/xen/suspend.c index 212ffe012b76..2a234db5949b 100644 --- a/trunk/arch/x86/xen/suspend.c +++ b/trunk/arch/x86/xen/suspend.c @@ -35,8 +35,7 @@ void xen_post_suspend(int suspend_cancelled) pfn_to_mfn(xen_start_info->console.domU.mfn); } else { #ifdef CONFIG_SMP - BUG_ON(xen_cpu_initialized_map == NULL); - cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); + xen_cpu_initialized_map = cpu_online_map; #endif xen_vcpu_restore(); } diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h index c1f8faf0a2c5..9e1afae8461f 100644 --- a/trunk/arch/x86/xen/xen-ops.h +++ b/trunk/arch/x86/xen/xen-ops.h @@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void); __cpuinit void xen_init_lock_cpu(int cpu); void xen_uninit_lock_cpu(int cpu); -extern cpumask_var_t xen_cpu_initialized_map; +extern cpumask_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} #endif diff --git a/trunk/drivers/lguest/interrupts_and_traps.c b/trunk/drivers/lguest/interrupts_and_traps.c index 415fab0125ac..a1039068f95c 100644 --- a/trunk/drivers/lguest/interrupts_and_traps.c +++ b/trunk/drivers/lguest/interrupts_and_traps.c @@ -222,16 +222,11 @@ bool check_syscall_vector(struct lguest *lg) int init_interrupts(void) { /* If they want some strange system call vector, reserve it now */ - if (syscall_vector != SYSCALL_VECTOR) { - if (test_bit(syscall_vector, used_vectors) || - vector_used_by_percpu_irq(syscall_vector)) { - printk(KERN_ERR "lg: couldn't reserve syscall %u\n", - syscall_vector); - return -EBUSY; - } - set_bit(syscall_vector, used_vectors); + if (syscall_vector != SYSCALL_VECTOR + && test_and_set_bit(syscall_vector, used_vectors)) { + printk("lg: couldn't reserve syscall %u\n", syscall_vector); + return -EBUSY; } - return 0; } diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 158d53d07765..8395e715809d 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -250,7 +250,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(void); extern void task_rq_unlock_wait(struct task_struct *p); -extern cpumask_var_t nohz_cpu_mask; +extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); #else @@ -758,51 +758,20 @@ enum cpu_idle_type { #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ -enum powersavings_balance_level { - POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ - POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package - * first for long running threads - */ - POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle - * cpu package for power savings - */ - MAX_POWERSAVINGS_BALANCE_LEVELS -}; +#define BALANCE_FOR_MC_POWER \ + (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) -extern int sched_mc_power_savings, sched_smt_power_savings; +#define BALANCE_FOR_PKG_POWER \ + ((sched_mc_power_savings || sched_smt_power_savings) ? \ + SD_POWERSAVINGS_BALANCE : 0) -static inline int sd_balance_for_mc_power(void) -{ - if (sched_smt_power_savings) - return SD_POWERSAVINGS_BALANCE; +#define test_sd_parent(sd, flag) ((sd->parent && \ + (sd->parent->flags & flag)) ? 1 : 0) - return 0; -} - -static inline int sd_balance_for_package_power(void) -{ - if (sched_mc_power_savings | sched_smt_power_savings) - return SD_POWERSAVINGS_BALANCE; - - return 0; -} - -/* - * Optimise SD flags for power savings: - * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings. - * Keep default SD flags if sched_{smt,mc}_power_saving=0 - */ - -static inline int sd_power_saving_flags(void) -{ - if (sched_mc_power_savings | sched_smt_power_savings) - return SD_BALANCE_NEWIDLE; - - return 0; -} struct sched_group { struct sched_group *next; /* Must be a circular list */ + cpumask_t cpumask; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a @@ -815,15 +784,8 @@ struct sched_group { * (see include/linux/reciprocal_div.h) */ u32 reciprocal_cpu_power; - - unsigned long cpumask[]; }; -static inline struct cpumask *sched_group_cpus(struct sched_group *sg) -{ - return to_cpumask(sg->cpumask); -} - enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, @@ -847,6 +809,7 @@ struct sched_domain { struct sched_domain *parent; /* top domain must be null terminated */ struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ + cpumask_t span; /* span of all CPUs in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ @@ -901,35 +864,18 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif - - /* span of all CPUs in this domain */ - unsigned long span[]; }; -static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -{ - return to_cpumask(sd->span); -} - -extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); -/* Test a flag in parent sched domain */ -static inline int test_sd_parent(struct sched_domain *sd, int flag) -{ - if (sd->parent && (sd->parent->flags & flag)) - return 1; - - return 0; -} - #else /* CONFIG_SMP */ struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { } @@ -980,7 +926,7 @@ struct sched_class { void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, - const struct cpumask *newmask); + const cpumask_t *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); @@ -1633,12 +1579,12 @@ extern cputime_t task_gtime(struct task_struct *p); #ifdef CONFIG_SMP extern int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask); + const cpumask_t *new_mask); #else static inline int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask) + const cpumask_t *new_mask) { - if (!cpumask_test_cpu(0, new_mask)) + if (!cpu_isset(0, *new_mask)) return -EINVAL; return 0; } @@ -2249,8 +2195,10 @@ __trace_special(void *__tr, void *__data, } #endif -extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); -extern long sched_getaffinity(pid_t pid, struct cpumask *mask); +extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); +extern long sched_getaffinity(pid_t pid, cpumask_t *mask); + +extern int sched_mc_power_savings, sched_smt_power_savings; extern void normalize_rt_tasks(void); diff --git a/trunk/include/linux/topology.h b/trunk/include/linux/topology.h index e632d29f0544..0c5b5ac36d8e 100644 --- a/trunk/include/linux/topology.h +++ b/trunk/include/linux/topology.h @@ -125,8 +125,7 @@ int arch_update_cpu_topology(void); | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ - | sd_balance_for_mc_power()\ - | sd_power_saving_flags(),\ + | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ } @@ -151,8 +150,7 @@ int arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ - | sd_balance_for_package_power()\ - | sd_power_saving_flags(),\ + | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/trunk/kernel/rcuclassic.c b/trunk/kernel/rcuclassic.c index 6ec495f60ead..0ff9b05706a6 100644 --- a/trunk/kernel/rcuclassic.c +++ b/trunk/kernel/rcuclassic.c @@ -394,7 +394,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) * unnecessarily. */ smp_mb(); - cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask); + cpumask_andnot(to_cpumask(rcp->cpumask), + cpu_online_mask, &nohz_cpu_mask); rcp->signaled = 0; } diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 27ba1d642f0f..f2095660efec 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -498,26 +498,18 @@ struct rt_rq { */ struct root_domain { atomic_t refcount; - cpumask_var_t span; - cpumask_var_t online; + cpumask_t span; + cpumask_t online; /* * The "RT overload" flag: it gets set if a CPU has more than * one runnable RT task. */ - cpumask_var_t rto_mask; + cpumask_t rto_mask; atomic_t rto_count; #ifdef CONFIG_SMP struct cpupri cpupri; #endif -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - /* - * Preferred wake up cpu nominated by sched_mc balance that will be - * used when most cpus are idle in the system indicating overall very - * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) - */ - unsigned int sched_mc_preferred_wakeup_cpu; -#endif }; /* @@ -1522,7 +1514,7 @@ static int tg_shares_up(struct task_group *tg, void *data) struct sched_domain *sd = data; int i; - for_each_cpu(i, sched_domain_span(sd)) { + for_each_cpu_mask(i, sd->span) { /* * If there are currently no tasks on the cpu pretend there * is one of average load so that when a new task gets to @@ -1543,7 +1535,7 @@ static int tg_shares_up(struct task_group *tg, void *data) if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) shares = tg->shares; - for_each_cpu(i, sched_domain_span(sd)) + for_each_cpu_mask(i, sd->span) update_group_shares_cpu(tg, i, shares, rq_weight); return 0; @@ -2109,17 +2101,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) int i; /* Skip over this group if it has no CPUs allowed */ - if (!cpumask_intersects(sched_group_cpus(group), - &p->cpus_allowed)) + if (!cpus_intersects(group->cpumask, p->cpus_allowed)) continue; - local_group = cpumask_test_cpu(this_cpu, - sched_group_cpus(group)); + local_group = cpu_isset(this_cpu, group->cpumask); /* Tally up the load of all CPUs in the group */ avg_load = 0; - for_each_cpu(i, sched_group_cpus(group)) { + for_each_cpu_mask_nr(i, group->cpumask) { /* Bias balancing toward cpus of our domain */ if (local_group) load = source_load(i, load_idx); @@ -2151,14 +2141,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) * find_idlest_cpu - find the idlest cpu among the cpus in group. */ static int -find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) +find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, + cpumask_t *tmp) { unsigned long load, min_load = ULONG_MAX; int idlest = -1; int i; /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { + cpus_and(*tmp, group->cpumask, p->cpus_allowed); + + for_each_cpu_mask_nr(i, *tmp) { load = weighted_cpuload(i); if (load < min_load || (load == min_load && i == this_cpu)) { @@ -2200,6 +2193,7 @@ static int sched_balance_self(int cpu, int flag) update_shares(sd); while (sd) { + cpumask_t span, tmpmask; struct sched_group *group; int new_cpu, weight; @@ -2208,13 +2202,14 @@ static int sched_balance_self(int cpu, int flag) continue; } + span = sd->span; group = find_idlest_group(sd, t, cpu); if (!group) { sd = sd->child; continue; } - new_cpu = find_idlest_cpu(group, t, cpu); + new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); if (new_cpu == -1 || new_cpu == cpu) { /* Now try balancing at a lower domain level of cpu */ sd = sd->child; @@ -2223,10 +2218,10 @@ static int sched_balance_self(int cpu, int flag) /* Now try balancing at a lower domain level of new_cpu */ cpu = new_cpu; - weight = cpumask_weight(sched_domain_span(sd)); sd = NULL; + weight = cpus_weight(span); for_each_domain(cpu, tmp) { - if (weight <= cpumask_weight(sched_domain_span(tmp))) + if (weight <= cpus_weight(tmp->span)) break; if (tmp->flags & flag) sd = tmp; @@ -2271,7 +2266,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) cpu = task_cpu(p); for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (cpu_isset(cpu, sd->span)) { update_shares(sd); break; } @@ -2320,7 +2315,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) else { struct sched_domain *sd; for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (cpu_isset(cpu, sd->span)) { schedstat_inc(sd, ttwu_wake_remote); break; } @@ -2851,7 +2846,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) struct rq *rq; rq = task_rq_lock(p, &flags); - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) + if (!cpu_isset(dest_cpu, p->cpus_allowed) || unlikely(!cpu_active(dest_cpu))) goto out; @@ -2916,7 +2911,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { + if (!cpu_isset(this_cpu, p->cpus_allowed)) { schedstat_inc(p, se.nr_failed_migrations_affine); return 0; } @@ -3091,7 +3086,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long *imbalance, enum cpu_idle_type idle, - int *sd_idle, const struct cpumask *cpus, int *balance) + int *sd_idle, const cpumask_t *cpus, int *balance) { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; @@ -3127,11 +3122,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long sum_avg_load_per_task; unsigned long avg_load_per_task; - local_group = cpumask_test_cpu(this_cpu, - sched_group_cpus(group)); + local_group = cpu_isset(this_cpu, group->cpumask); if (local_group) - balance_cpu = cpumask_first(sched_group_cpus(group)); + balance_cpu = first_cpu(group->cpumask); /* Tally up the load of all CPUs in the group */ sum_weighted_load = sum_nr_running = avg_load = 0; @@ -3140,8 +3134,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, max_cpu_load = 0; min_cpu_load = ~0UL; - for_each_cpu_and(i, sched_group_cpus(group), cpus) { - struct rq *rq = cpu_rq(i); + for_each_cpu_mask_nr(i, group->cpumask) { + struct rq *rq; + + if (!cpu_isset(i, *cpus)) + continue; + + rq = cpu_rq(i); if (*sd_idle && rq->nr_running) *sd_idle = 0; @@ -3252,8 +3251,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ if ((sum_nr_running < min_nr_running) || (sum_nr_running == min_nr_running && - cpumask_first(sched_group_cpus(group)) > - cpumask_first(sched_group_cpus(group_min)))) { + first_cpu(group->cpumask) < + first_cpu(group_min->cpumask))) { group_min = group; min_nr_running = sum_nr_running; min_load_per_task = sum_weighted_load / @@ -3268,8 +3267,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (sum_nr_running <= group_capacity - 1) { if (sum_nr_running > leader_nr_running || (sum_nr_running == leader_nr_running && - cpumask_first(sched_group_cpus(group)) < - cpumask_first(sched_group_cpus(group_leader)))) { + first_cpu(group->cpumask) > + first_cpu(group_leader->cpumask))) { group_leader = group; leader_nr_running = sum_nr_running; } @@ -3395,10 +3394,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (this == group_leader && group_leader != group_min) { *imbalance = min_load_per_task; - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = - cpumask_first(sched_group_cpus(group_leader)); - } return group_min; } #endif @@ -3412,16 +3407,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ static struct rq * find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, - unsigned long imbalance, const struct cpumask *cpus) + unsigned long imbalance, const cpumask_t *cpus) { struct rq *busiest = NULL, *rq; unsigned long max_load = 0; int i; - for_each_cpu(i, sched_group_cpus(group)) { + for_each_cpu_mask_nr(i, group->cpumask) { unsigned long wl; - if (!cpumask_test_cpu(i, cpus)) + if (!cpu_isset(i, *cpus)) continue; rq = cpu_rq(i); @@ -3451,7 +3446,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, - int *balance, struct cpumask *cpus) + int *balance, cpumask_t *cpus) { int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; @@ -3459,7 +3454,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct rq *busiest; unsigned long flags; - cpumask_setall(cpus); + cpus_setall(*cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3519,8 +3514,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(all_pinned)) { - cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) + cpu_clear(cpu_of(busiest), *cpus); + if (!cpus_empty(*cpus)) goto redo; goto out_balanced; } @@ -3537,8 +3532,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, /* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ - if (!cpumask_test_cpu(this_cpu, - &busiest->curr->cpus_allowed)) { + if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; goto out_one_pinned; @@ -3613,7 +3607,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, */ static int load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, - struct cpumask *cpus) + cpumask_t *cpus) { struct sched_group *group; struct rq *busiest = NULL; @@ -3622,7 +3616,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, int sd_idle = 0; int all_pinned = 0; - cpumask_setall(cpus); + cpus_setall(*cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3666,71 +3660,17 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, double_unlock_balance(this_rq, busiest); if (unlikely(all_pinned)) { - cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) + cpu_clear(cpu_of(busiest), *cpus); + if (!cpus_empty(*cpus)) goto redo; } } if (!ld_moved) { - int active_balance = 0; - schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - - if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) - return -1; - - if (sd->nr_balance_failed++ < 2) - return -1; - - /* - * The only task running in a non-idle cpu can be moved to this - * cpu in an attempt to completely freeup the other CPU - * package. The same method used to move task in load_balance() - * have been extended for load_balance_newidle() to speedup - * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2) - * - * The package power saving logic comes from - * find_busiest_group(). If there are no imbalance, then - * f_b_g() will return NULL. However when sched_mc={1,2} then - * f_b_g() will select a group from which a running task may be - * pulled to this cpu in order to make the other package idle. - * If there is no opportunity to make a package idle and if - * there are no imbalance, then f_b_g() will return NULL and no - * action will be taken in load_balance_newidle(). - * - * Under normal task pull operation due to imbalance, there - * will be more than one task in the source run queue and - * move_tasks() will succeed. ld_moved will be true and this - * active balance code will not be triggered. - */ - - /* Lock busiest in correct order while this_rq is held */ - double_lock_balance(this_rq, busiest); - - /* - * don't kick the migration_thread, if the curr - * task on busiest cpu can't be moved to this_cpu - */ - if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { - double_unlock_balance(this_rq, busiest); - all_pinned = 1; - return ld_moved; - } - - if (!busiest->active_balance) { - busiest->active_balance = 1; - busiest->push_cpu = this_cpu; - active_balance = 1; - } - - double_unlock_balance(this_rq, busiest); - if (active_balance) - wake_up_process(busiest->migration_thread); - } else sd->nr_balance_failed = 0; @@ -3756,10 +3696,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; unsigned long next_balance = jiffies + HZ; - cpumask_var_t tmpmask; - - if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC)) - return; + cpumask_t tmpmask; for_each_domain(this_cpu, sd) { unsigned long interval; @@ -3770,7 +3707,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) if (sd->flags & SD_BALANCE_NEWIDLE) /* If we've pulled tasks over stop searching: */ pulled_task = load_balance_newidle(this_cpu, this_rq, - sd, tmpmask); + sd, &tmpmask); interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) @@ -3785,7 +3722,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) */ this_rq->next_balance = next_balance; } - free_cpumask_var(tmpmask); } /* @@ -3823,7 +3759,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { if ((sd->flags & SD_LOAD_BALANCE) && - cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) + cpu_isset(busiest_cpu, sd->span)) break; } @@ -3842,9 +3778,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) #ifdef CONFIG_NO_HZ static struct { atomic_t load_balancer; - cpumask_var_t cpu_mask; + cpumask_t cpu_mask; } nohz ____cacheline_aligned = { .load_balancer = ATOMIC_INIT(-1), + .cpu_mask = CPU_MASK_NONE, }; /* @@ -3872,7 +3809,7 @@ int select_nohz_load_balancer(int stop_tick) int cpu = smp_processor_id(); if (stop_tick) { - cpumask_set_cpu(cpu, nohz.cpu_mask); + cpu_set(cpu, nohz.cpu_mask); cpu_rq(cpu)->in_nohz_recently = 1; /* @@ -3886,7 +3823,7 @@ int select_nohz_load_balancer(int stop_tick) } /* time for ilb owner also to sleep */ - if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { + if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { if (atomic_read(&nohz.load_balancer) == cpu) atomic_set(&nohz.load_balancer, -1); return 0; @@ -3899,10 +3836,10 @@ int select_nohz_load_balancer(int stop_tick) } else if (atomic_read(&nohz.load_balancer) == cpu) return 1; } else { - if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) + if (!cpu_isset(cpu, nohz.cpu_mask)) return 0; - cpumask_clear_cpu(cpu, nohz.cpu_mask); + cpu_clear(cpu, nohz.cpu_mask); if (atomic_read(&nohz.load_balancer) == cpu) if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) @@ -3930,11 +3867,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) unsigned long next_balance = jiffies + 60*HZ; int update_next_balance = 0; int need_serialize; - cpumask_var_t tmp; - - /* Fails alloc? Rebalancing probably not a priority right now. */ - if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) - return; + cpumask_t tmp; for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -3959,7 +3892,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) } if (time_after_eq(jiffies, sd->last_balance + interval)) { - if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { + if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { /* * We've pulled tasks over so either we're no * longer idle, or one of our SMT siblings is @@ -3993,8 +3926,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) */ if (likely(update_next_balance)) rq->next_balance = next_balance; - - free_cpumask_var(tmp); } /* @@ -4019,13 +3950,12 @@ static void run_rebalance_domains(struct softirq_action *h) */ if (this_rq->idle_at_tick && atomic_read(&nohz.load_balancer) == this_cpu) { + cpumask_t cpus = nohz.cpu_mask; struct rq *rq; int balance_cpu; - for_each_cpu(balance_cpu, nohz.cpu_mask) { - if (balance_cpu == this_cpu) - continue; - + cpu_clear(this_cpu, cpus); + for_each_cpu_mask_nr(balance_cpu, cpus) { /* * If this cpu gets work to do, stop the load balancing * work being done for other cpus. Next load @@ -4063,7 +3993,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) rq->in_nohz_recently = 0; if (atomic_read(&nohz.load_balancer) == cpu) { - cpumask_clear_cpu(cpu, nohz.cpu_mask); + cpu_clear(cpu, nohz.cpu_mask); atomic_set(&nohz.load_balancer, -1); } @@ -4076,7 +4006,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * TBD: Traverse the sched domains and nominate * the nearest cpu in the nohz.cpu_mask. */ - int ilb = cpumask_first(nohz.cpu_mask); + int ilb = first_cpu(nohz.cpu_mask); if (ilb < nr_cpu_ids) resched_cpu(ilb); @@ -4088,7 +4018,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * cpus with ticks stopped, is it time for that to stop? */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && - cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { + cpus_weight(nohz.cpu_mask) == num_online_cpus()) { resched_cpu(cpu); return; } @@ -4098,7 +4028,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * someone else, then no need raise the SCHED_SOFTIRQ */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && - cpumask_test_cpu(cpu, nohz.cpu_mask)) + cpu_isset(cpu, nohz.cpu_mask)) return; #endif if (time_after_eq(jiffies, rq->next_balance)) @@ -5471,9 +5401,10 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) return retval; } -long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) { - cpumask_var_t cpus_allowed, new_mask; + cpumask_t cpus_allowed; + cpumask_t new_mask = *in_mask; struct task_struct *p; int retval; @@ -5495,14 +5426,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); - if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { - retval = -ENOMEM; - goto out_put_task; - } - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { - retval = -ENOMEM; - goto out_free_cpus_allowed; - } retval = -EPERM; if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; @@ -5511,41 +5434,37 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_unlock; - cpuset_cpus_allowed(p, cpus_allowed); - cpumask_and(new_mask, in_mask, cpus_allowed); + cpuset_cpus_allowed(p, &cpus_allowed); + cpus_and(new_mask, new_mask, cpus_allowed); again: - retval = set_cpus_allowed_ptr(p, new_mask); + retval = set_cpus_allowed_ptr(p, &new_mask); if (!retval) { - cpuset_cpus_allowed(p, cpus_allowed); - if (!cpumask_subset(new_mask, cpus_allowed)) { + cpuset_cpus_allowed(p, &cpus_allowed); + if (!cpus_subset(new_mask, cpus_allowed)) { /* * We must have raced with a concurrent cpuset * update. Just reset the cpus_allowed to the * cpuset's cpus_allowed */ - cpumask_copy(new_mask, cpus_allowed); + new_mask = cpus_allowed; goto again; } } out_unlock: - free_cpumask_var(new_mask); -out_free_cpus_allowed: - free_cpumask_var(cpus_allowed); -out_put_task: put_task_struct(p); put_online_cpus(); return retval; } static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, - struct cpumask *new_mask) + cpumask_t *new_mask) { - if (len < cpumask_size()) - cpumask_clear(new_mask); - else if (len > cpumask_size()) - len = cpumask_size(); - + if (len < sizeof(cpumask_t)) { + memset(new_mask, 0, sizeof(cpumask_t)); + } else if (len > sizeof(cpumask_t)) { + len = sizeof(cpumask_t); + } return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; } @@ -5558,20 +5477,17 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { - cpumask_var_t new_mask; + cpumask_t new_mask; int retval; - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) - return -ENOMEM; + retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); + if (retval) + return retval; - retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); - if (retval == 0) - retval = sched_setaffinity(pid, new_mask); - free_cpumask_var(new_mask); - return retval; + return sched_setaffinity(pid, &new_mask); } -long sched_getaffinity(pid_t pid, struct cpumask *mask) +long sched_getaffinity(pid_t pid, cpumask_t *mask) { struct task_struct *p; int retval; @@ -5588,7 +5504,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) if (retval) goto out_unlock; - cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); + cpus_and(*mask, p->cpus_allowed, cpu_online_map); out_unlock: read_unlock(&tasklist_lock); @@ -5607,24 +5523,19 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { int ret; - cpumask_var_t mask; + cpumask_t mask; - if (len < cpumask_size()) + if (len < sizeof(cpumask_t)) return -EINVAL; - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + ret = sched_getaffinity(pid, &mask); + if (ret < 0) + return ret; - ret = sched_getaffinity(pid, mask); - if (ret == 0) { - if (copy_to_user(user_mask_ptr, mask, cpumask_size())) - ret = -EFAULT; - else - ret = cpumask_size(); - } - free_cpumask_var(mask); + if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) + return -EFAULT; - return ret; + return sizeof(cpumask_t); } /** @@ -5966,7 +5877,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); idle->prio = idle->normal_prio = MAX_PRIO; - cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); + idle->cpus_allowed = cpumask_of_cpu(cpu); __set_task_cpu(idle, cpu); rq->curr = rq->idle = idle; @@ -5993,9 +5904,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * indicates which cpus entered this state. This is used * in the rcu update to wait only for active cpus. For system * which do not switch off the HZ timer nohz_cpu_mask should - * always be CPU_BITS_NONE. + * always be CPU_MASK_NONE. */ -cpumask_var_t nohz_cpu_mask; +cpumask_t nohz_cpu_mask = CPU_MASK_NONE; /* * Increase the granularity value when there are more CPUs, @@ -6050,7 +5961,7 @@ static inline void sched_init_granularity(void) * task must not exit() & deallocate itself prematurely. The * call is not atomic; no spinlocks may be held. */ -int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) { struct migration_req req; unsigned long flags; @@ -6058,13 +5969,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) int ret = 0; rq = task_rq_lock(p, &flags); - if (!cpumask_intersects(new_mask, cpu_online_mask)) { + if (!cpus_intersects(*new_mask, cpu_online_map)) { ret = -EINVAL; goto out; } if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && - !cpumask_equal(&p->cpus_allowed, new_mask))) { + !cpus_equal(p->cpus_allowed, *new_mask))) { ret = -EINVAL; goto out; } @@ -6072,15 +5983,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); else { - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + p->cpus_allowed = *new_mask; + p->rt.nr_cpus_allowed = cpus_weight(*new_mask); } /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), new_mask)) + if (cpu_isset(task_cpu(p), *new_mask)) goto out; - if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { + if (migrate_task(p, any_online_cpu(*new_mask), &req)) { /* Need help from migration thread: drop lock and wait. */ task_rq_unlock(rq, &flags); wake_up_process(rq->migration_thread); @@ -6122,7 +6033,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) if (task_cpu(p) != src_cpu) goto done; /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!cpu_isset(dest_cpu, p->cpus_allowed)) goto fail; on_rq = p->se.on_rq; @@ -6219,43 +6130,50 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) */ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { + unsigned long flags; + cpumask_t mask; + struct rq *rq; int dest_cpu; - /* FIXME: Use cpumask_of_node here. */ - cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu)); - const struct cpumask *nodemask = &_nodemask; - -again: - /* Look for allowed, online CPU in same node. */ - for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) - goto move; - - /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); - if (dest_cpu < nr_cpu_ids) - goto move; - - /* No more Mr. Nice Guy. */ - if (dest_cpu >= nr_cpu_ids) { - cpuset_cpus_allowed_locked(p, &p->cpus_allowed); - dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu%d\n", - task_pid_nr(p), p->comm, dead_cpu); - } - } + do { + /* On same node? */ + mask = node_to_cpumask(cpu_to_node(dead_cpu)); + cpus_and(mask, mask, p->cpus_allowed); + dest_cpu = any_online_cpu(mask); -move: - /* It can have affinity changed while we were choosing. */ - if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) - goto again; + /* On any allowed CPU? */ + if (dest_cpu >= nr_cpu_ids) + dest_cpu = any_online_cpu(p->cpus_allowed); + + /* No more Mr. Nice Guy. */ + if (dest_cpu >= nr_cpu_ids) { + cpumask_t cpus_allowed; + + cpuset_cpus_allowed_locked(p, &cpus_allowed); + /* + * Try to stay on the same cpuset, where the + * current cpuset may be a subset of all cpus. + * The cpuset_cpus_allowed_locked() variant of + * cpuset_cpus_allowed() will not block. It must be + * called within calls to cpuset_lock/cpuset_unlock. + */ + rq = task_rq_lock(p, &flags); + p->cpus_allowed = cpus_allowed; + dest_cpu = any_online_cpu(p->cpus_allowed); + task_rq_unlock(rq, &flags); + + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu%d\n", + task_pid_nr(p), p->comm, dead_cpu); + } + } + } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); } /* @@ -6267,7 +6185,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) */ static void migrate_nr_uninterruptible(struct rq *rq_src) { - struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); + struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); unsigned long flags; local_irq_save(flags); @@ -6557,7 +6475,7 @@ static void set_rq_online(struct rq *rq) if (!rq->online) { const struct sched_class *class; - cpumask_set_cpu(rq->cpu, rq->rd->online); + cpu_set(rq->cpu, rq->rd->online); rq->online = 1; for_each_class(class) { @@ -6577,7 +6495,7 @@ static void set_rq_offline(struct rq *rq) class->rq_offline(rq); } - cpumask_clear_cpu(rq->cpu, rq->rd->online); + cpu_clear(rq->cpu, rq->rd->online); rq->online = 0; } } @@ -6618,7 +6536,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + BUG_ON(!cpu_isset(cpu, rq->rd->span)); set_rq_online(rq); } @@ -6632,7 +6550,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; /* Unbind it from offline cpu so it can run. Fall thru. */ kthread_bind(cpu_rq(cpu)->migration_thread, - cpumask_any(cpu_online_mask)); + any_online_cpu(cpu_online_map)); kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; @@ -6682,7 +6600,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + BUG_ON(!cpu_isset(cpu, rq->rd->span)); set_rq_offline(rq); } spin_unlock_irqrestore(&rq->lock, flags); @@ -6721,13 +6639,13 @@ early_initcall(migration_init); #ifdef CONFIG_SCHED_DEBUG static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, - struct cpumask *groupmask) + cpumask_t *groupmask) { struct sched_group *group = sd->groups; char str[256]; - cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd)); - cpumask_clear(groupmask); + cpulist_scnprintf(str, sizeof(str), &sd->span); + cpus_clear(*groupmask); printk(KERN_DEBUG "%*s domain %d: ", level, "", level); @@ -6741,11 +6659,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, printk(KERN_CONT "span %s level %s\n", str, sd->name); - if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { + if (!cpu_isset(cpu, sd->span)) { printk(KERN_ERR "ERROR: domain->span does not contain " "CPU%d\n", cpu); } - if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) { + if (!cpu_isset(cpu, group->cpumask)) { printk(KERN_ERR "ERROR: domain->groups does not contain" " CPU%d\n", cpu); } @@ -6765,32 +6683,31 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!cpumask_weight(sched_group_cpus(group))) { + if (!cpus_weight(group->cpumask)) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: empty group\n"); break; } - if (cpumask_intersects(groupmask, sched_group_cpus(group))) { + if (cpus_intersects(*groupmask, group->cpumask)) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: repeated CPUs\n"); break; } - cpumask_or(groupmask, groupmask, sched_group_cpus(group)); + cpus_or(*groupmask, *groupmask, group->cpumask); - cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); + cpulist_scnprintf(str, sizeof(str), &group->cpumask); printk(KERN_CONT " %s", str); group = group->next; } while (group != sd->groups); printk(KERN_CONT "\n"); - if (!cpumask_equal(sched_domain_span(sd), groupmask)) + if (!cpus_equal(sd->span, *groupmask)) printk(KERN_ERR "ERROR: groups don't span domain->span\n"); - if (sd->parent && - !cpumask_subset(groupmask, sched_domain_span(sd->parent))) + if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) printk(KERN_ERR "ERROR: parent span is not a superset " "of domain->span\n"); return 0; @@ -6798,7 +6715,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, static void sched_domain_debug(struct sched_domain *sd, int cpu) { - cpumask_var_t groupmask; + cpumask_t *groupmask; int level = 0; if (!sd) { @@ -6808,7 +6725,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); - if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) { + groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!groupmask) { printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); return; } @@ -6821,7 +6739,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) if (!sd) break; } - free_cpumask_var(groupmask); + kfree(groupmask); } #else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) @@ -6829,7 +6747,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) static int sd_degenerate(struct sched_domain *sd) { - if (cpumask_weight(sched_domain_span(sd)) == 1) + if (cpus_weight(sd->span) == 1) return 1; /* Following flags need at least 2 groups */ @@ -6860,7 +6778,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) if (sd_degenerate(parent)) return 1; - if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) + if (!cpus_equal(sd->span, parent->span)) return 0; /* Does parent contain flags not in child? */ @@ -6884,16 +6802,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } -static void free_rootdomain(struct root_domain *rd) -{ - cpupri_cleanup(&rd->cpupri); - - free_cpumask_var(rd->rto_mask); - free_cpumask_var(rd->online); - free_cpumask_var(rd->span); - kfree(rd); -} - static void rq_attach_root(struct rq *rq, struct root_domain *rd) { unsigned long flags; @@ -6903,63 +6811,38 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) if (rq->rd) { struct root_domain *old_rd = rq->rd; - if (cpumask_test_cpu(rq->cpu, old_rd->online)) + if (cpu_isset(rq->cpu, old_rd->online)) set_rq_offline(rq); - cpumask_clear_cpu(rq->cpu, old_rd->span); + cpu_clear(rq->cpu, old_rd->span); if (atomic_dec_and_test(&old_rd->refcount)) - free_rootdomain(old_rd); + kfree(old_rd); } atomic_inc(&rd->refcount); rq->rd = rd; - cpumask_set_cpu(rq->cpu, rd->span); - if (cpumask_test_cpu(rq->cpu, cpu_online_mask)) + cpu_set(rq->cpu, rd->span); + if (cpu_isset(rq->cpu, cpu_online_map)) set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); } -static int init_rootdomain(struct root_domain *rd, bool bootmem) +static void init_rootdomain(struct root_domain *rd) { memset(rd, 0, sizeof(*rd)); - if (bootmem) { - alloc_bootmem_cpumask_var(&def_root_domain.span); - alloc_bootmem_cpumask_var(&def_root_domain.online); - alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); - cpupri_init(&rd->cpupri, true); - return 0; - } - - if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) - goto free_rd; - if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) - goto free_span; - if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) - goto free_online; - - if (cpupri_init(&rd->cpupri, false) != 0) - goto free_rto_mask; - return 0; + cpus_clear(rd->span); + cpus_clear(rd->online); -free_rto_mask: - free_cpumask_var(rd->rto_mask); -free_online: - free_cpumask_var(rd->online); -free_span: - free_cpumask_var(rd->span); -free_rd: - kfree(rd); - return -ENOMEM; + cpupri_init(&rd->cpupri); } static void init_defrootdomain(void) { - init_rootdomain(&def_root_domain, true); - + init_rootdomain(&def_root_domain); atomic_set(&def_root_domain.refcount, 1); } @@ -6971,10 +6854,7 @@ static struct root_domain *alloc_rootdomain(void) if (!rd) return NULL; - if (init_rootdomain(rd, false) != 0) { - kfree(rd); - return NULL; - } + init_rootdomain(rd); return rd; } @@ -7016,12 +6896,19 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) } /* cpus with isolated domains */ -static cpumask_var_t cpu_isolated_map; +static cpumask_t cpu_isolated_map = CPU_MASK_NONE; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { - cpulist_parse(str, cpu_isolated_map); + static int __initdata ints[NR_CPUS]; + int i; + + str = get_options(str, ARRAY_SIZE(ints), ints); + cpus_clear(cpu_isolated_map); + for (i = 1; i <= ints[0]; i++) + if (ints[i] < NR_CPUS) + cpu_set(ints[i], cpu_isolated_map); return 1; } @@ -7030,43 +6917,42 @@ __setup("isolcpus=", isolated_cpu_setup); /* * init_sched_build_groups takes the cpumask we wish to span, and a pointer * to a function which identifies what group(along with sched group) a CPU - * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids - * (due to the fact that we keep track of groups covered with a struct cpumask). + * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS + * (due to the fact that we keep track of groups covered with a cpumask_t). * * init_sched_build_groups will build a circular linked list of the groups * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ static void -init_sched_build_groups(const struct cpumask *span, - const struct cpumask *cpu_map, - int (*group_fn)(int cpu, const struct cpumask *cpu_map, +init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, + int (*group_fn)(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, - struct cpumask *tmpmask), - struct cpumask *covered, struct cpumask *tmpmask) + cpumask_t *tmpmask), + cpumask_t *covered, cpumask_t *tmpmask) { struct sched_group *first = NULL, *last = NULL; int i; - cpumask_clear(covered); + cpus_clear(*covered); - for_each_cpu(i, span) { + for_each_cpu_mask_nr(i, *span) { struct sched_group *sg; int group = group_fn(i, cpu_map, &sg, tmpmask); int j; - if (cpumask_test_cpu(i, covered)) + if (cpu_isset(i, *covered)) continue; - cpumask_clear(sched_group_cpus(sg)); + cpus_clear(sg->cpumask); sg->__cpu_power = 0; - for_each_cpu(j, span) { + for_each_cpu_mask_nr(j, *span) { if (group_fn(j, cpu_map, NULL, tmpmask) != group) continue; - cpumask_set_cpu(j, covered); - cpumask_set_cpu(j, sched_group_cpus(sg)); + cpu_set(j, *covered); + cpu_set(j, sg->cpumask); } if (!first) first = sg; @@ -7130,10 +7016,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) * should be one that prevents unnecessary balancing, but also spreads tasks * out optimally. */ -static void sched_domain_node_span(int node, struct cpumask *span) +static void sched_domain_node_span(int node, cpumask_t *span) { nodemask_t used_nodes; - /* FIXME: use cpumask_of_node() */ node_to_cpumask_ptr(nodemask, node); int i; @@ -7154,34 +7039,19 @@ static void sched_domain_node_span(int node, struct cpumask *span) int sched_smt_power_savings = 0, sched_mc_power_savings = 0; -/* - * The cpus mask in sched_group and sched_domain hangs off the end. - * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space - * for nr_cpu_ids < CONFIG_NR_CPUS. - */ -struct static_sched_group { - struct sched_group sg; - DECLARE_BITMAP(cpus, CONFIG_NR_CPUS); -}; - -struct static_sched_domain { - struct sched_domain sd; - DECLARE_BITMAP(span, CONFIG_NR_CPUS); -}; - /* * SMT sched-domains: */ #ifdef CONFIG_SCHED_SMT -static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); static int -cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *unused) +cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *unused) { if (sg) - *sg = &per_cpu(sched_group_cpus, cpu).sg; + *sg = &per_cpu(sched_group_cpus, cpu); return cpu; } #endif /* CONFIG_SCHED_SMT */ @@ -7190,55 +7060,56 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, * multi-core sched-domains: */ #ifdef CONFIG_SCHED_MC -static DEFINE_PER_CPU(struct static_sched_domain, core_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); +static DEFINE_PER_CPU(struct sched_domain, core_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_core); #endif /* CONFIG_SCHED_MC */ #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) static int -cpu_to_core_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *mask) +cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *mask) { int group; - cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); - group = cpumask_first(mask); + *mask = per_cpu(cpu_sibling_map, cpu); + cpus_and(*mask, *mask, *cpu_map); + group = first_cpu(*mask); if (sg) - *sg = &per_cpu(sched_group_core, group).sg; + *sg = &per_cpu(sched_group_core, group); return group; } #elif defined(CONFIG_SCHED_MC) static int -cpu_to_core_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *unused) +cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *unused) { if (sg) - *sg = &per_cpu(sched_group_core, cpu).sg; + *sg = &per_cpu(sched_group_core, cpu); return cpu; } #endif -static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_group, sched_group_phys); static int -cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, struct cpumask *mask) +cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, + cpumask_t *mask) { int group; #ifdef CONFIG_SCHED_MC - /* FIXME: Use cpu_coregroup_mask. */ - *mask = cpu_coregroup_map(cpu); + *mask = *cpu_coregroup_mask(cpu); cpus_and(*mask, *mask, *cpu_map); - group = cpumask_first(mask); + group = first_cpu(*mask); #elif defined(CONFIG_SCHED_SMT) - cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); - group = cpumask_first(mask); + *mask = per_cpu(cpu_sibling_map, cpu); + cpus_and(*mask, *mask, *cpu_map); + group = first_cpu(*mask); #else group = cpu; #endif if (sg) - *sg = &per_cpu(sched_group_phys, group).sg; + *sg = &per_cpu(sched_group_phys, group); return group; } @@ -7252,21 +7123,19 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains); static struct sched_group ***sched_group_nodes_bycpu; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); +static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); -static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, - struct sched_group **sg, - struct cpumask *nodemask) +static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg, cpumask_t *nodemask) { int group; - /* FIXME: use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu)); - cpumask_and(nodemask, pnodemask, cpu_map); - group = cpumask_first(nodemask); + *nodemask = node_to_cpumask(cpu_to_node(cpu)); + cpus_and(*nodemask, *nodemask, *cpu_map); + group = first_cpu(*nodemask); if (sg) - *sg = &per_cpu(sched_group_allnodes, group).sg; + *sg = &per_cpu(sched_group_allnodes, group); return group; } @@ -7278,11 +7147,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) if (!sg) return; do { - for_each_cpu(j, sched_group_cpus(sg)) { + for_each_cpu_mask_nr(j, sg->cpumask) { struct sched_domain *sd; - sd = &per_cpu(phys_domains, j).sd; - if (j != cpumask_first(sched_group_cpus(sd->groups))) { + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { /* * Only add "power" once for each * physical package. @@ -7299,12 +7168,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) #ifdef CONFIG_NUMA /* Free memory allocated for various sched_group structures */ -static void free_sched_groups(const struct cpumask *cpu_map, - struct cpumask *nodemask) +static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { int cpu, i; - for_each_cpu(cpu, cpu_map) { + for_each_cpu_mask_nr(cpu, *cpu_map) { struct sched_group **sched_group_nodes = sched_group_nodes_bycpu[cpu]; @@ -7313,11 +7181,10 @@ static void free_sched_groups(const struct cpumask *cpu_map, for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; - /* FIXME: Use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, i); - cpus_and(*nodemask, *pnodemask, *cpu_map); - if (cpumask_empty(nodemask)) + *nodemask = node_to_cpumask(i); + cpus_and(*nodemask, *nodemask, *cpu_map); + if (cpus_empty(*nodemask)) continue; if (sg == NULL) @@ -7335,8 +7202,7 @@ static void free_sched_groups(const struct cpumask *cpu_map, } } #else /* !CONFIG_NUMA */ -static void free_sched_groups(const struct cpumask *cpu_map, - struct cpumask *nodemask) +static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { } #endif /* CONFIG_NUMA */ @@ -7362,7 +7228,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) WARN_ON(!sd || !sd->groups); - if (cpu != cpumask_first(sched_group_cpus(sd->groups))) + if (cpu != first_cpu(sd->groups->cpumask)) return; child = sd->child; @@ -7427,6 +7293,48 @@ SD_INIT_FUNC(CPU) SD_INIT_FUNC(MC) #endif +/* + * To minimize stack usage kmalloc room for cpumasks and share the + * space as the usage in build_sched_domains() dictates. Used only + * if the amount of space is significant. + */ +struct allmasks { + cpumask_t tmpmask; /* make this one first */ + union { + cpumask_t nodemask; + cpumask_t this_sibling_map; + cpumask_t this_core_map; + }; + cpumask_t send_covered; + +#ifdef CONFIG_NUMA + cpumask_t domainspan; + cpumask_t covered; + cpumask_t notcovered; +#endif +}; + +#if NR_CPUS > 128 +#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v +static inline void sched_cpumask_alloc(struct allmasks **masks) +{ + *masks = kmalloc(sizeof(**masks), GFP_KERNEL); +} +static inline void sched_cpumask_free(struct allmasks *masks) +{ + kfree(masks); +} +#else +#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v +static inline void sched_cpumask_alloc(struct allmasks **masks) +{ } +static inline void sched_cpumask_free(struct allmasks *masks) +{ } +#endif + +#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ + ((unsigned long)(a) + offsetof(struct allmasks, v)) + static int default_relax_domain_level = -1; static int __init setup_relax_domain_level(char *str) @@ -7466,38 +7374,17 @@ static void set_domain_attribute(struct sched_domain *sd, * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ -static int __build_sched_domains(const struct cpumask *cpu_map, +static int __build_sched_domains(const cpumask_t *cpu_map, struct sched_domain_attr *attr) { - int i, err = -ENOMEM; + int i; struct root_domain *rd; - cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered, - tmpmask; + SCHED_CPUMASK_DECLARE(allmasks); + cpumask_t *tmpmask; #ifdef CONFIG_NUMA - cpumask_var_t domainspan, covered, notcovered; struct sched_group **sched_group_nodes = NULL; int sd_allnodes = 0; - if (!alloc_cpumask_var(&domainspan, GFP_KERNEL)) - goto out; - if (!alloc_cpumask_var(&covered, GFP_KERNEL)) - goto free_domainspan; - if (!alloc_cpumask_var(¬covered, GFP_KERNEL)) - goto free_covered; -#endif - - if (!alloc_cpumask_var(&nodemask, GFP_KERNEL)) - goto free_notcovered; - if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL)) - goto free_nodemask; - if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL)) - goto free_this_sibling_map; - if (!alloc_cpumask_var(&send_covered, GFP_KERNEL)) - goto free_this_core_map; - if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) - goto free_send_covered; - -#ifdef CONFIG_NUMA /* * Allocate the per-node list of sched groups */ @@ -7505,37 +7392,54 @@ static int __build_sched_domains(const struct cpumask *cpu_map, GFP_KERNEL); if (!sched_group_nodes) { printk(KERN_WARNING "Can not alloc sched group node list\n"); - goto free_tmpmask; + return -ENOMEM; } #endif rd = alloc_rootdomain(); if (!rd) { printk(KERN_WARNING "Cannot alloc root domain\n"); - goto free_sched_groups; +#ifdef CONFIG_NUMA + kfree(sched_group_nodes); +#endif + return -ENOMEM; } + /* get space for all scratch cpumask variables */ + sched_cpumask_alloc(&allmasks); + if (!allmasks) { + printk(KERN_WARNING "Cannot alloc cpumask array\n"); + kfree(rd); #ifdef CONFIG_NUMA - sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes; + kfree(sched_group_nodes); +#endif + return -ENOMEM; + } + + tmpmask = (cpumask_t *)allmasks; + + +#ifdef CONFIG_NUMA + sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; #endif /* * Set up domains for cpus specified by the cpu_map. */ - for_each_cpu(i, cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd = NULL, *p; + SCHED_CPUMASK_VAR(nodemask, allmasks); - /* FIXME: use cpumask_of_node */ *nodemask = node_to_cpumask(cpu_to_node(i)); cpus_and(*nodemask, *nodemask, *cpu_map); #ifdef CONFIG_NUMA - if (cpumask_weight(cpu_map) > - SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { + if (cpus_weight(*cpu_map) > + SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { sd = &per_cpu(allnodes_domains, i); SD_INIT(sd, ALLNODES); set_domain_attribute(sd, attr); - cpumask_copy(sched_domain_span(sd), cpu_map); + sd->span = *cpu_map; cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); p = sd; sd_allnodes = 1; @@ -7545,19 +7449,18 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = &per_cpu(node_domains, i); SD_INIT(sd, NODE); set_domain_attribute(sd, attr); - sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); + sched_domain_node_span(cpu_to_node(i), &sd->span); sd->parent = p; if (p) p->child = sd; - cpumask_and(sched_domain_span(sd), - sched_domain_span(sd), cpu_map); + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; - sd = &per_cpu(phys_domains, i).sd; + sd = &per_cpu(phys_domains, i); SD_INIT(sd, CPU); set_domain_attribute(sd, attr); - cpumask_copy(sched_domain_span(sd), nodemask); + sd->span = *nodemask; sd->parent = p; if (p) p->child = sd; @@ -7565,12 +7468,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_MC p = sd; - sd = &per_cpu(core_domains, i).sd; + sd = &per_cpu(core_domains, i); SD_INIT(sd, MC); set_domain_attribute(sd, attr); - *sched_domain_span(sd) = cpu_coregroup_map(i); - cpumask_and(sched_domain_span(sd), - sched_domain_span(sd), cpu_map); + sd->span = *cpu_coregroup_mask(i); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); @@ -7578,11 +7480,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_SMT p = sd; - sd = &per_cpu(cpu_domains, i).sd; + sd = &per_cpu(cpu_domains, i); SD_INIT(sd, SIBLING); set_domain_attribute(sd, attr); - cpumask_and(sched_domain_span(sd), - &per_cpu(cpu_sibling_map, i), cpu_map); + sd->span = per_cpu(cpu_sibling_map, i); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); @@ -7591,10 +7493,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu(i, cpu_map) { - cpumask_and(this_sibling_map, - &per_cpu(cpu_sibling_map, i), cpu_map); - if (i != cpumask_first(this_sibling_map)) + for_each_cpu_mask_nr(i, *cpu_map) { + SCHED_CPUMASK_VAR(this_sibling_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + + *this_sibling_map = per_cpu(cpu_sibling_map, i); + cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map); + if (i != first_cpu(*this_sibling_map)) continue; init_sched_build_groups(this_sibling_map, cpu_map, @@ -7605,11 +7510,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_MC /* Set up multi-core groups */ - for_each_cpu(i, cpu_map) { - /* FIXME: Use cpu_coregroup_mask */ - *this_core_map = cpu_coregroup_map(i); + for_each_cpu_mask_nr(i, *cpu_map) { + SCHED_CPUMASK_VAR(this_core_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + + *this_core_map = *cpu_coregroup_mask(i); cpus_and(*this_core_map, *this_core_map, *cpu_map); - if (i != cpumask_first(this_core_map)) + if (i != first_cpu(*this_core_map)) continue; init_sched_build_groups(this_core_map, cpu_map, @@ -7620,10 +7527,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, /* Set up physical groups */ for (i = 0; i < nr_node_ids; i++) { - /* FIXME: Use cpumask_of_node */ + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); + *nodemask = node_to_cpumask(i); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpumask_empty(nodemask)) + if (cpus_empty(*nodemask)) continue; init_sched_build_groups(nodemask, cpu_map, @@ -7634,6 +7543,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_NUMA /* Set up node groups */ if (sd_allnodes) { + SCHED_CPUMASK_VAR(send_covered, allmasks); + init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group, send_covered, tmpmask); @@ -7642,58 +7553,58 @@ static int __build_sched_domains(const struct cpumask *cpu_map, for (i = 0; i < nr_node_ids; i++) { /* Set up node groups */ struct sched_group *sg, *prev; + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(domainspan, allmasks); + SCHED_CPUMASK_VAR(covered, allmasks); int j; - /* FIXME: Use cpumask_of_node */ *nodemask = node_to_cpumask(i); - cpumask_clear(covered); + cpus_clear(*covered); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpumask_empty(nodemask)) { + if (cpus_empty(*nodemask)) { sched_group_nodes[i] = NULL; continue; } sched_domain_node_span(i, domainspan); - cpumask_and(domainspan, domainspan, cpu_map); + cpus_and(*domainspan, *domainspan, *cpu_map); - sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), - GFP_KERNEL, i); + sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING "Can not alloc domain group for " "node %d\n", i); goto error; } sched_group_nodes[i] = sg; - for_each_cpu(j, nodemask) { + for_each_cpu_mask_nr(j, *nodemask) { struct sched_domain *sd; sd = &per_cpu(node_domains, j); sd->groups = sg; } sg->__cpu_power = 0; - cpumask_copy(sched_group_cpus(sg), nodemask); + sg->cpumask = *nodemask; sg->next = sg; - cpumask_or(covered, covered, nodemask); + cpus_or(*covered, *covered, *nodemask); prev = sg; for (j = 0; j < nr_node_ids; j++) { + SCHED_CPUMASK_VAR(notcovered, allmasks); int n = (i + j) % nr_node_ids; - /* FIXME: Use cpumask_of_node */ node_to_cpumask_ptr(pnodemask, n); - cpumask_complement(notcovered, covered); - cpumask_and(tmpmask, notcovered, cpu_map); - cpumask_and(tmpmask, tmpmask, domainspan); - if (cpumask_empty(tmpmask)) + cpus_complement(*notcovered, *covered); + cpus_and(*tmpmask, *notcovered, *cpu_map); + cpus_and(*tmpmask, *tmpmask, *domainspan); + if (cpus_empty(*tmpmask)) break; - cpumask_and(tmpmask, tmpmask, pnodemask); - if (cpumask_empty(tmpmask)) + cpus_and(*tmpmask, *tmpmask, *pnodemask); + if (cpus_empty(*tmpmask)) continue; - sg = kmalloc_node(sizeof(struct sched_group) + - cpumask_size(), + sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING @@ -7701,9 +7612,9 @@ static int __build_sched_domains(const struct cpumask *cpu_map, goto error; } sg->__cpu_power = 0; - cpumask_copy(sched_group_cpus(sg), tmpmask); + sg->cpumask = *tmpmask; sg->next = prev->next; - cpumask_or(covered, covered, tmpmask); + cpus_or(*covered, *covered, *tmpmask); prev->next = sg; prev = sg; } @@ -7712,22 +7623,22 @@ static int __build_sched_domains(const struct cpumask *cpu_map, /* Calculate CPU power for physical packages and nodes */ #ifdef CONFIG_SCHED_SMT - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(cpu_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(cpu_domains, i); init_sched_groups_power(i, sd); } #endif #ifdef CONFIG_SCHED_MC - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(core_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(core_domains, i); init_sched_groups_power(i, sd); } #endif - for_each_cpu(i, cpu_map) { - struct sched_domain *sd = &per_cpu(phys_domains, i).sd; + for_each_cpu_mask_nr(i, *cpu_map) { + struct sched_domain *sd = &per_cpu(phys_domains, i); init_sched_groups_power(i, sd); } @@ -7739,78 +7650,53 @@ static int __build_sched_domains(const struct cpumask *cpu_map, if (sd_allnodes) { struct sched_group *sg; - cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg, + cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, tmpmask); init_numa_sched_groups_power(sg); } #endif /* Attach the domains */ - for_each_cpu(i, cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i).sd; + sd = &per_cpu(cpu_domains, i); #elif defined(CONFIG_SCHED_MC) - sd = &per_cpu(core_domains, i).sd; + sd = &per_cpu(core_domains, i); #else - sd = &per_cpu(phys_domains, i).sd; + sd = &per_cpu(phys_domains, i); #endif cpu_attach_domain(sd, rd, i); } - err = 0; - -free_tmpmask: - free_cpumask_var(tmpmask); -free_send_covered: - free_cpumask_var(send_covered); -free_this_core_map: - free_cpumask_var(this_core_map); -free_this_sibling_map: - free_cpumask_var(this_sibling_map); -free_nodemask: - free_cpumask_var(nodemask); -free_notcovered: -#ifdef CONFIG_NUMA - free_cpumask_var(notcovered); -free_covered: - free_cpumask_var(covered); -free_domainspan: - free_cpumask_var(domainspan); -out: -#endif - return err; - -free_sched_groups: -#ifdef CONFIG_NUMA - kfree(sched_group_nodes); -#endif - goto free_tmpmask; + sched_cpumask_free(allmasks); + return 0; #ifdef CONFIG_NUMA error: free_sched_groups(cpu_map, tmpmask); - free_rootdomain(rd); - goto free_tmpmask; + sched_cpumask_free(allmasks); + kfree(rd); + return -ENOMEM; #endif } -static int build_sched_domains(const struct cpumask *cpu_map) +static int build_sched_domains(const cpumask_t *cpu_map) { return __build_sched_domains(cpu_map, NULL); } -static struct cpumask *doms_cur; /* current sched domains */ +static cpumask_t *doms_cur; /* current sched domains */ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static struct sched_domain_attr *dattr_cur; /* attribues of custom domains in 'doms_cur' */ /* * Special case: If a kmalloc of a doms_cur partition (array of - * cpumask) fails, then fallback to a single sched domain, - * as determined by the single cpumask fallback_doms. + * cpumask_t) fails, then fallback to a single sched domain, + * as determined by the single cpumask_t fallback_doms. */ -static cpumask_var_t fallback_doms; +static cpumask_t fallback_doms; /* * arch_update_cpu_topology lets virtualized architectures update the @@ -7827,16 +7713,16 @@ int __attribute__((weak)) arch_update_cpu_topology(void) * For now this just excludes isolated cpus, but could be used to * exclude other special cases in the future. */ -static int arch_init_sched_domains(const struct cpumask *cpu_map) +static int arch_init_sched_domains(const cpumask_t *cpu_map) { int err; arch_update_cpu_topology(); ndoms_cur = 1; - doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); + doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); if (!doms_cur) - doms_cur = fallback_doms; - cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); + doms_cur = &fallback_doms; + cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); dattr_cur = NULL; err = build_sched_domains(doms_cur); register_sched_domain_sysctl(); @@ -7844,8 +7730,8 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) return err; } -static void arch_destroy_sched_domains(const struct cpumask *cpu_map, - struct cpumask *tmpmask) +static void arch_destroy_sched_domains(const cpumask_t *cpu_map, + cpumask_t *tmpmask) { free_sched_groups(cpu_map, tmpmask); } @@ -7854,16 +7740,15 @@ static void arch_destroy_sched_domains(const struct cpumask *cpu_map, * Detach sched domains from a group of cpus specified in cpu_map * These cpus will now be attached to the NULL domain */ -static void detach_destroy_domains(const struct cpumask *cpu_map) +static void detach_destroy_domains(const cpumask_t *cpu_map) { - /* Save because hotplug lock held. */ - static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); + cpumask_t tmpmask; int i; - for_each_cpu(i, cpu_map) + for_each_cpu_mask_nr(i, *cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); synchronize_sched(); - arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); + arch_destroy_sched_domains(cpu_map, &tmpmask); } /* handle null as "default" */ @@ -7888,7 +7773,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * doms_new[] to the current sched domain partitioning, doms_cur[]. * It destroys each deleted domain and builds each new domain. * - * 'doms_new' is an array of cpumask's of length 'ndoms_new'. + * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. * The masks don't intersect (don't overlap.) We should setup one * sched domain for each mask. CPUs not in any of the cpumasks will * not be load balanced. If the same cpumask appears both in the @@ -7902,14 +7787,13 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * the single partition 'fallback_doms', it also forces the domains * to be rebuilt. * - * If doms_new == NULL it will be replaced with cpu_online_mask. + * If doms_new == NULL it will be replaced with cpu_online_map. * ndoms_new == 0 is a special case for destroying existing domains, * and it will not create the default domain. * * Call with hotplug lock held */ -/* FIXME: Change to struct cpumask *doms_new[] */ -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { int i, j, n; @@ -7928,7 +7812,7 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(&doms_cur[i], &doms_new[j]) + if (cpus_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } @@ -7940,15 +7824,15 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, if (doms_new == NULL) { ndoms_cur = 0; - doms_new = fallback_doms; - cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); + doms_new = &fallback_doms; + cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); WARN_ON_ONCE(dattr_new); } /* Build new domains */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < ndoms_cur && !new_topology; j++) { - if (cpumask_equal(&doms_new[i], &doms_cur[j]) + if (cpus_equal(doms_new[i], doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } @@ -7960,7 +7844,7 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, } /* Remember the new sched domains */ - if (doms_cur != fallback_doms) + if (doms_cur != &fallback_doms) kfree(doms_cur); kfree(dattr_cur); /* kfree(NULL) is safe */ doms_cur = doms_new; @@ -7989,25 +7873,14 @@ int arch_reinit_sched_domains(void) static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) { int ret; - unsigned int level = 0; - if (sscanf(buf, "%u", &level) != 1) - return -EINVAL; - - /* - * level is always be positive so don't check for - * level < POWERSAVINGS_BALANCE_NONE which is 0 - * What happens on 0 or 1 byte write, - * need to check for count as well? - */ - - if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) + if (buf[0] != '0' && buf[0] != '1') return -EINVAL; if (smt) - sched_smt_power_savings = level; + sched_smt_power_savings = (buf[0] == '1'); else - sched_mc_power_savings = level; + sched_mc_power_savings = (buf[0] == '1'); ret = arch_reinit_sched_domains(); @@ -8111,9 +7984,7 @@ static int update_runtime(struct notifier_block *nfb, void __init sched_init_smp(void) { - cpumask_var_t non_isolated_cpus; - - alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); + cpumask_t non_isolated_cpus; #if defined(CONFIG_NUMA) sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), @@ -8122,10 +7993,10 @@ void __init sched_init_smp(void) #endif get_online_cpus(); mutex_lock(&sched_domains_mutex); - arch_init_sched_domains(cpu_online_mask); - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - if (cpumask_empty(non_isolated_cpus)) - cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); + arch_init_sched_domains(&cpu_online_map); + cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); + if (cpus_empty(non_isolated_cpus)) + cpu_set(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); put_online_cpus(); @@ -8140,13 +8011,9 @@ void __init sched_init_smp(void) init_hrtick(); /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) + if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) BUG(); sched_init_granularity(); - free_cpumask_var(non_isolated_cpus); - - alloc_cpumask_var(&fallback_doms, GFP_KERNEL); - init_sched_rt_class(); } #else void __init sched_init_smp(void) @@ -8461,15 +8328,6 @@ void __init sched_init(void) */ current->sched_class = &fair_sched_class; - /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_bootmem_cpumask_var(&nohz_cpu_mask); -#ifdef CONFIG_SMP -#ifdef CONFIG_NO_HZ - alloc_bootmem_cpumask_var(&nohz.cpu_mask); -#endif - alloc_bootmem_cpumask_var(&cpu_isolated_map); -#endif /* SMP */ - scheduler_running = 1; } diff --git a/trunk/kernel/sched_cpupri.c b/trunk/kernel/sched_cpupri.c index 018b7be1db2e..52154fefab7e 100644 --- a/trunk/kernel/sched_cpupri.c +++ b/trunk/kernel/sched_cpupri.c @@ -67,21 +67,24 @@ static int convert_prio(int prio) * Returns: (int)bool - CPUs were found */ int cpupri_find(struct cpupri *cp, struct task_struct *p, - struct cpumask *lowest_mask) + cpumask_t *lowest_mask) { int idx = 0; int task_pri = convert_prio(p->prio); for_each_cpupri_active(cp->pri_active, idx) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; + cpumask_t mask; if (idx >= task_pri) break; - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) + cpus_and(mask, p->cpus_allowed, vec->mask); + + if (cpus_empty(mask)) continue; - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + *lowest_mask = mask; return 1; } @@ -123,7 +126,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) vec->count--; if (!vec->count) clear_bit(oldpri, cp->pri_active); - cpumask_clear_cpu(cpu, vec->mask); + cpu_clear(cpu, vec->mask); spin_unlock_irqrestore(&vec->lock, flags); } @@ -133,7 +136,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) spin_lock_irqsave(&vec->lock, flags); - cpumask_set_cpu(cpu, vec->mask); + cpu_set(cpu, vec->mask); vec->count++; if (vec->count == 1) set_bit(newpri, cp->pri_active); @@ -147,11 +150,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) /** * cpupri_init - initialize the cpupri structure * @cp: The cpupri context - * @bootmem: true if allocations need to use bootmem * - * Returns: -ENOMEM if memory fails. + * Returns: (void) */ -int cpupri_init(struct cpupri *cp, bool bootmem) +void cpupri_init(struct cpupri *cp) { int i; @@ -162,30 +164,11 @@ int cpupri_init(struct cpupri *cp, bool bootmem) spin_lock_init(&vec->lock); vec->count = 0; - if (bootmem) - alloc_bootmem_cpumask_var(&vec->mask); - else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) - goto cleanup; + cpus_clear(vec->mask); } for_each_possible_cpu(i) cp->cpu_to_pri[i] = CPUPRI_INVALID; - return 0; - -cleanup: - for (i--; i >= 0; i--) - free_cpumask_var(cp->pri_to_cpu[i].mask); - return -ENOMEM; } -/** - * cpupri_cleanup - clean up the cpupri structure - * @cp: The cpupri context - */ -void cpupri_cleanup(struct cpupri *cp) -{ - int i; - for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) - free_cpumask_var(cp->pri_to_cpu[i].mask); -} diff --git a/trunk/kernel/sched_cpupri.h b/trunk/kernel/sched_cpupri.h index 642a94ef8a0a..f25811b0f931 100644 --- a/trunk/kernel/sched_cpupri.h +++ b/trunk/kernel/sched_cpupri.h @@ -14,7 +14,7 @@ struct cpupri_vec { spinlock_t lock; int count; - cpumask_var_t mask; + cpumask_t mask; }; struct cpupri { @@ -27,8 +27,7 @@ struct cpupri { int cpupri_find(struct cpupri *cp, struct task_struct *p, cpumask_t *lowest_mask); void cpupri_set(struct cpupri *cp, int cpu, int pri); -int cpupri_init(struct cpupri *cp, bool bootmem); -void cpupri_cleanup(struct cpupri *cp); +void cpupri_init(struct cpupri *cp); #else #define cpupri_set(cp, cpu, pri) do { } while (0) #define cpupri_init() do { } while (0) diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index 56c0efe902a7..5ad4440f0fc4 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -1019,33 +1019,16 @@ static void yield_task_fair(struct rq *rq) * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (cpu_active_mask) + * hence we need to mask them out (cpu_active_map) * * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) static int wake_idle(int cpu, struct task_struct *p) { + cpumask_t tmp; struct sched_domain *sd; int i; - unsigned int chosen_wakeup_cpu; - int this_cpu; - - /* - * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu - * are idle and this is not a kernel thread and this task's affinity - * allows it to be moved to preferred cpu, then just move! - */ - - this_cpu = smp_processor_id(); - chosen_wakeup_cpu = - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; - - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && - idle_cpu(cpu) && idle_cpu(this_cpu) && - p->mm && !(p->flags & PF_KTHREAD) && - cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) - return chosen_wakeup_cpu; /* * If it is idle, then it is the best cpu to run this task. @@ -1063,9 +1046,10 @@ static int wake_idle(int cpu, struct task_struct *p) if ((sd->flags & SD_WAKE_IDLE) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { - for_each_cpu_and(i, sched_domain_span(sd), - &p->cpus_allowed) { - if (cpu_active(i) && idle_cpu(i)) { + cpus_and(tmp, sd->span, p->cpus_allowed); + cpus_and(tmp, tmp, cpu_active_map); + for_each_cpu_mask_nr(i, tmp) { + if (idle_cpu(i)) { if (i != task_cpu(p)) { schedstat_inc(p, se.nr_wakeups_idle); @@ -1258,13 +1242,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync) * this_cpu and prev_cpu are present in: */ for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { + if (cpu_isset(prev_cpu, sd->span)) { this_sd = sd; break; } } - if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) + if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) goto out; /* diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index 833b6d44483c..51d2af3e6191 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq) if (!rq->online) return; - cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); + cpu_set(rq->cpu, rq->rd->rto_mask); /* * Make sure the mask is visible before we set * the overload count. That is checked to determine @@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq) /* the order here really doesn't matter */ atomic_dec(&rq->rd->rto_count); - cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); + cpu_clear(rq->cpu, rq->rd->rto_mask); } static void update_rt_migration(struct rq *rq) @@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) } #ifdef CONFIG_SMP -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { return cpu_rq(smp_processor_id())->rd->span; } #else -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { - return cpu_online_mask; + return cpu_online_map; } #endif @@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) return rt_rq->rt_throttled; } -static inline const struct cpumask *sched_rt_period_mask(void) +static inline cpumask_t sched_rt_period_mask(void) { - return cpu_online_mask; + return cpu_online_map; } static inline @@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq) int i, weight, more = 0; u64 rt_period; - weight = cpumask_weight(rd->span); + weight = cpus_weight(rd->span); spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu(i, rd->span) { + for_each_cpu_mask_nr(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq) /* * Greedy reclaim, take back as much as we can. */ - for_each_cpu(i, rd->span) { + for_each_cpu_mask(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq) static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) { int i, idle = 1; - const struct cpumask *span; + cpumask_t span; if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return 1; span = sched_rt_period_mask(); - for_each_cpu(i, span) { + for_each_cpu_mask(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); @@ -805,20 +805,17 @@ static int select_task_rq_rt(struct task_struct *p, int sync) static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - cpumask_var_t mask; + cpumask_t mask; if (rq->curr->rt.nr_cpus_allowed == 1) return; - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) - return; - if (p->rt.nr_cpus_allowed != 1 - && cpupri_find(&rq->rd->cpupri, p, mask)) - goto free; + && cpupri_find(&rq->rd->cpupri, p, &mask)) + return; - if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) - goto free; + if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + return; /* * There appears to be other cpus that can accept @@ -827,8 +824,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) */ requeue_task_rt(rq, p, 1); resched_task(rq->curr); -free: - free_cpumask_var(mask); } #endif /* CONFIG_SMP */ @@ -919,7 +914,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && + (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && (p->rt.nr_cpus_allowed > 1)) return 1; return 0; @@ -958,7 +953,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) return next; } -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); +static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) { @@ -978,7 +973,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); @@ -993,7 +988,7 @@ static int find_lowest_rq(struct task_struct *task) * I guess we might want to change cpupri_find() to ignore those * in the first place. */ - cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); + cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); /* * At this point we have built a mask of cpus representing the @@ -1003,7 +998,7 @@ static int find_lowest_rq(struct task_struct *task) * We prioritize the last cpu that the task executed on since * it is most likely cache-hot in that location. */ - if (cpumask_test_cpu(cpu, lowest_mask)) + if (cpu_isset(cpu, *lowest_mask)) return cpu; /* @@ -1018,8 +1013,7 @@ static int find_lowest_rq(struct task_struct *task) cpumask_t domain_mask; int best_cpu; - cpumask_and(&domain_mask, sched_domain_span(sd), - lowest_mask); + cpus_and(domain_mask, sd->span, *lowest_mask); best_cpu = pick_optimal_cpu(this_cpu, &domain_mask); @@ -1060,8 +1054,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, - &task->cpus_allowed) || + !cpu_isset(lowest_rq->cpu, + task->cpus_allowed) || task_running(rq, task) || !task->se.on_rq)) { @@ -1182,7 +1176,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu(cpu, this_rq->rd->rto_mask) { + for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; @@ -1311,9 +1305,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, } static void set_cpus_allowed_rt(struct task_struct *p, - const struct cpumask *new_mask) + const cpumask_t *new_mask) { - int weight = cpumask_weight(new_mask); + int weight = cpus_weight(*new_mask); BUG_ON(!rt_task(p)); @@ -1334,7 +1328,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, update_rt_migration(rq); } - cpumask_copy(&p->cpus_allowed, new_mask); + p->cpus_allowed = *new_mask; p->rt.nr_cpus_allowed = weight; } @@ -1377,14 +1371,6 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p, if (!rq->rt.rt_nr_running) pull_rt_task(rq); } - -static inline void init_sched_rt_class(void) -{ - unsigned int i; - - for_each_possible_cpu(i) - alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL); -} #endif /* CONFIG_SMP */ /* @@ -1555,4 +1541,3 @@ static void print_rt_stats(struct seq_file *m, int cpu) rcu_read_unlock(); } #endif /* CONFIG_SCHED_DEBUG */ - diff --git a/trunk/kernel/sched_stats.h b/trunk/kernel/sched_stats.h index f2773b5d1226..b59fd9cdc1b6 100644 --- a/trunk/kernel/sched_stats.h +++ b/trunk/kernel/sched_stats.h @@ -42,8 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v) for_each_domain(cpu, sd) { enum cpu_idle_type itype; - cpumask_scnprintf(mask_str, mask_len, - sched_domain_span(sd)); + cpumask_scnprintf(mask_str, mask_len, &sd->span); seq_printf(seq, "domain%d %s", dcount++, mask_str); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { diff --git a/trunk/kernel/time/tick-sched.c b/trunk/kernel/time/tick-sched.c index 76a574bbef97..8f3fc2582d38 100644 --- a/trunk/kernel/time/tick-sched.c +++ b/trunk/kernel/time/tick-sched.c @@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void) if (!ts->tick_stopped) return; - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); now = ktime_get(); ts->idle_waketime = now; @@ -301,7 +301,7 @@ void tick_nohz_stop_sched_tick(int inidle) tick_do_timer_cpu = TICK_DO_TIMER_NONE; if (delta_jiffies > 1) - cpumask_set_cpu(cpu, nohz_cpu_mask); + cpu_set(cpu, nohz_cpu_mask); /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) @@ -319,7 +319,7 @@ void tick_nohz_stop_sched_tick(int inidle) /* * sched tick not stopped! */ - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); goto out; } @@ -361,7 +361,7 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: @@ -439,7 +439,7 @@ void tick_nohz_restart_sched_tick(void) select_nohz_load_balancer(0); now = ktime_get(); tick_do_update_jiffies64(now); - cpumask_clear_cpu(cpu, nohz_cpu_mask); + cpu_clear(cpu, nohz_cpu_mask); /* * We stopped the tick in idle. Update process times would miss the