Skip to content

Commit

Permalink
[PATCH] cpu state clean after hot remove
Browse files Browse the repository at this point in the history
Clean CPU states in order to reuse smp boot code for CPU hotplug.

Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Li Shaohua authored and Linus Torvalds committed Jun 25, 2005
1 parent 0bb3184 commit e1367da
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 43 deletions.
12 changes: 12 additions & 0 deletions arch/i386/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -651,3 +651,15 @@ void __devinit cpu_init(void)
clear_used_math();
mxcsr_feature_mask_init();
}

#ifdef CONFIG_HOTPLUG_CPU
void __devinit cpu_uninit(void)
{
int cpu = raw_smp_processor_id();
cpu_clear(cpu, cpu_initialized);

/* lazy TLB state */
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}
#endif
5 changes: 5 additions & 0 deletions arch/i386/kernel/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ void irq_ctx_init(int cpu)
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
}

void irq_ctx_exit(int cpu)
{
hardirq_ctx[cpu] = NULL;
}

extern asmlinkage void __do_softirq(void);

asmlinkage void do_softirq(void)
Expand Down
20 changes: 9 additions & 11 deletions arch/i386/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,21 +152,19 @@ static void poll_idle (void)
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
/* This must be done before dead CPU ack */
cpu_exit_clear();
wbinvd();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;

/* We shouldn't have to disable interrupts while dead, but
* some interrupts just don't seem to go away, and this makes
* it "work" for testing purposes. */
/* Death loop */
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();

/*
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
__flush_tlb_all();
cpu_set(smp_processor_id(), cpu_online_map);
enable_APIC_timer();
local_irq_enable();
while (1)
__asm__ __volatile__("hlt":::"memory");
}
#else
static inline void play_dead(void)
Expand Down
175 changes: 144 additions & 31 deletions arch/i386/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
static cpumask_t smp_commenced_mask;

/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
* is no way to resync one AP against BP. TBD: for prescott and above, we
* should use IA64's algorithm
*/
static int __devinitdata tsc_sync_disabled;

/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data);
Expand Down Expand Up @@ -427,7 +433,7 @@ static void __devinit smp_callin(void)
/*
* Synchronize the TSC with the BP
*/
if (cpu_has_tsc && cpu_khz)
if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
synchronize_tsc_ap();
}

Expand Down Expand Up @@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused)
lock_ipi_call_lock();
cpu_set(smp_processor_id(), cpu_online_map);
unlock_ipi_call_lock();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;

/* We can take interrupts now: we're officially "up". */
local_irq_enable();
Expand Down Expand Up @@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_INIT */

extern cpumask_t cpu_initialized;
static inline int alloc_cpu_id(void)
{
cpumask_t tmp_map;
int cpu;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
if (cpu >= NR_CPUS)
return -ENODEV;
return cpu;
}

#ifdef CONFIG_HOTPLUG_CPU
static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
static inline struct task_struct * alloc_idle_task(int cpu)
{
struct task_struct *idle;

if ((idle = cpu_idle_tasks[cpu]) != NULL) {
/* initialize thread_struct. we really want to avoid destroy
* idle tread
*/
idle->thread.esp = (unsigned long)(((struct pt_regs *)
(THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
init_idle(idle, cpu);
return idle;
}
idle = fork_idle(cpu);

if (!IS_ERR(idle))
cpu_idle_tasks[cpu] = idle;
return idle;
}
#else
#define alloc_idle_task(cpu) fork_idle(cpu)
#endif

static int __devinit do_boot_cpu(int apicid)
static int __devinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
Expand All @@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid)
{
struct task_struct *idle;
unsigned long boot_error;
int timeout, cpu;
int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;

cpu = ++cpucount;
++cpucount;

/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
*/
idle = fork_idle(cpu);
idle = alloc_idle_task(cpu);
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
idle->thread.eip = (unsigned long) start_secondary;
Expand Down Expand Up @@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid)
inquire_remote_apic(apicid);
}
}
x86_cpu_to_apicid[cpu] = apicid;

if (boot_error) {
/* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu);
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
cpucount--;
} else {
x86_cpu_to_apicid[cpu] = apicid;
cpu_set(cpu, cpu_present_map);
}

/* mark "stuck" area as not stuck */
Expand All @@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid)
return boot_error;
}

#ifdef CONFIG_HOTPLUG_CPU
void cpu_exit_clear(void)
{
int cpu = raw_smp_processor_id();

idle_task_exit();

cpucount --;
cpu_uninit();
irq_ctx_exit(cpu);

cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
cpu_clear(cpu, cpu_present_map);

cpu_clear(cpu, smp_commenced_mask);
unmap_cpu_to_logical_apicid(cpu);
}

struct warm_boot_cpu_info {
struct completion *complete;
int apicid;
int cpu;
};

static void __devinit do_warm_boot_cpu(void *p)
{
struct warm_boot_cpu_info *info = p;
do_boot_cpu(info->apicid, info->cpu);
complete(info->complete);
}

int __devinit smp_prepare_cpu(int cpu)
{
DECLARE_COMPLETION(done);
struct warm_boot_cpu_info info;
struct work_struct task;
int apicid, ret;

lock_cpu_hotplug();
apicid = x86_cpu_to_apicid[cpu];
if (apicid == BAD_APICID) {
ret = -ENODEV;
goto exit;
}

info.complete = &done;
info.apicid = apicid;
info.cpu = cpu;
INIT_WORK(&task, do_warm_boot_cpu, &info);

tsc_sync_disabled = 1;

/* init low mem mapping */
memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
flush_tlb_all();
schedule_work(&task);
wait_for_completion(&done);

tsc_sync_disabled = 0;
zap_low_mappings();
ret = 0;
exit:
unlock_cpu_hotplug();
return ret;
}
#endif

static void smp_tune_scheduling (void)
{
unsigned long cachesize; /* kB */
Expand Down Expand Up @@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (max_cpus <= cpucount+1)
continue;

if (do_boot_cpu(apicid))
if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
else
Expand Down Expand Up @@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
cpu_set(smp_processor_id(), cpu_present_map);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}

#ifdef CONFIG_HOTPLUG_CPU

/* must be called with the cpucontrol mutex held */
static int __devinit cpu_enable(unsigned int cpu)
static void
remove_siblinginfo(int cpu)
{
/* get the target out of its holding state */
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
wmb();

/* wait for the processor to ack it. timeout? */
while (!cpu_online(cpu))
cpu_relax();

fixup_irqs(cpu_online_map);
/* counter the disable in fixup_irqs() */
local_irq_enable();
return 0;
int sibling;

for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
cpu_clear(cpu, cpu_sibling_map[sibling]);
for_each_cpu_mask(sibling, cpu_core_map[cpu])
cpu_clear(cpu, cpu_core_map[sibling]);
cpus_clear(cpu_sibling_map[cpu]);
cpus_clear(cpu_core_map[cpu]);
phys_proc_id[cpu] = BAD_APICID;
cpu_core_id[cpu] = BAD_APICID;
}

int __cpu_disable(void)
Expand All @@ -1193,6 +1307,8 @@ int __cpu_disable(void)
mdelay(1);
local_irq_disable();

remove_siblinginfo(cpu);

cpu_clear(cpu, map);
fixup_irqs(map);
/* It's now safe to remove this processor from the online map */
Expand All @@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu)

for (i = 0; i < 10; i++) {
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD)
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
printk ("CPU %d is now offline\n", cpu);
return;
}
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ/10);
}
Expand Down Expand Up @@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu)
return -EIO;
}

#ifdef CONFIG_HOTPLUG_CPU
/* Already up, and in cpu_quiescent now? */
if (cpu_isset(cpu, smp_commenced_mask)) {
cpu_enable(cpu);
return 0;
}
#endif

local_irq_enable();
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
Expand All @@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
zap_low_mappings();
#ifndef CONFIG_HOTPLUG_CPU
/*
* Disable executability of the SMP trampoline:
*/
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
#endif
}

void __init smp_intr_init(void)
Expand Down
8 changes: 7 additions & 1 deletion drivers/base/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ struct sysdev_class cpu_sysdev_class = {
EXPORT_SYMBOL(cpu_sysdev_class);

#ifdef CONFIG_HOTPLUG_CPU
#ifndef __HAVE_ARCH_SMP_PREPARE_CPU
#define smp_prepare_cpu(cpu) (0)
#endif

static ssize_t show_online(struct sys_device *dev, char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
Expand All @@ -36,7 +40,9 @@ static ssize_t store_online(struct sys_device *dev, const char *buf,
kobject_hotplug(&dev->kobj, KOBJ_OFFLINE);
break;
case '1':
ret = cpu_up(cpu->sysdev.id);
ret = smp_prepare_cpu(cpu->sysdev.id);
if (ret == 0)
ret = cpu_up(cpu->sysdev.id);
break;
default:
ret = -EINVAL;
Expand Down
2 changes: 2 additions & 0 deletions include/asm-i386/irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ extern void release_vm86_irqs(struct task_struct *);

#ifdef CONFIG_4KSTACKS
extern void irq_ctx_init(int cpu);
extern void irq_ctx_exit(int cpu);
# define __ARCH_HAS_DO_SOFTIRQ
#else
# define irq_ctx_init(cpu) do { } while (0)
# define irq_ctx_exit(cpu) do { } while (0)
#endif

#ifdef CONFIG_IRQBALANCE
Expand Down
8 changes: 8 additions & 0 deletions include/asm-i386/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ extern void unlock_ipi_call_lock(void);
#define MAX_APICID 256
extern u8 x86_cpu_to_apicid[];

#ifdef CONFIG_HOTPLUG_CPU
extern void cpu_exit_clear(void);
extern void cpu_uninit(void);

#define __HAVE_ARCH_SMP_PREPARE_CPU
extern int smp_prepare_cpu(int cpu);
#endif

/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
Expand Down

0 comments on commit e1367da

Please sign in to comment.