Skip to content

Commit

Permalink
x86/smpboot: Switch to hotplug core state synchronization
Browse files Browse the repository at this point in the history
The new AP state tracking and synchronization mechanism in the CPU hotplug
core code allows to remove quite some x86 specific code:

  1) The AP alive synchronization based on cpumasks

  2) The decision whether an AP can be brought up again

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205256.529657366@linutronix.de
  • Loading branch information
Thomas Gleixner authored and Peter Zijlstra committed May 15, 2023
1 parent 6f06212 commit 2711b8e
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 154 deletions.
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
select HAVE_GENERIC_VDSO
select HOTPLUG_CORE_SYNC_FULL if SMP
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
select NEED_PER_CPU_EMBED_FIRST_CHUNK
Expand Down
7 changes: 4 additions & 3 deletions arch/x86/include/asm/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ struct smp_ops {
void (*crash_stop_other_cpus)(void);
void (*smp_send_reschedule)(int cpu);

void (*cleanup_dead_cpu)(unsigned cpu);
void (*poll_sync_state)(void);
int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
Expand Down Expand Up @@ -90,7 +92,8 @@ static inline int __cpu_disable(void)

static inline void __cpu_die(unsigned int cpu)
{
smp_ops.cpu_die(cpu);
if (smp_ops.cpu_die)
smp_ops.cpu_die(cpu);
}

static inline void __noreturn play_dead(void)
Expand Down Expand Up @@ -123,8 +126,6 @@ void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
int common_cpu_die(unsigned int cpu);
void native_cpu_die(unsigned int cpu);
void __noreturn hlt_play_dead(void);
void native_play_dead(void);
void play_dead_common(void);
Expand Down
1 change: 0 additions & 1 deletion arch/x86/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,6 @@ struct smp_ops smp_ops = {
.smp_send_reschedule = native_smp_send_reschedule,

.cpu_up = native_cpu_up,
.cpu_die = native_cpu_die,
.cpu_disable = native_cpu_disable,
.play_dead = native_play_dead,

Expand Down
165 changes: 41 additions & 124 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include <linux/pgtable.h>
#include <linux/overflow.h>
#include <linux/stackprotector.h>
#include <linux/cpuhotplug.h>

#include <asm/acpi.h>
#include <asm/cacheinfo.h>
Expand Down Expand Up @@ -101,9 +102,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);

/* All of these masks are initialized in setup_cpu_local_masks() */
static cpumask_var_t cpu_initialized_mask;
static cpumask_var_t cpu_callout_mask;
/* Representing CPUs for which sibling maps can be computed */
static cpumask_var_t cpu_sibling_setup_mask;

Expand Down Expand Up @@ -166,10 +164,10 @@ static void ap_starting(void)
int cpuid = smp_processor_id();

/*
* If woken up by an INIT in an 82489DX configuration
* cpu_callout_mask guarantees the CPU does not reach this point
* before an INIT_deassert IPI reaches the local APIC, so it is now
* safe to touch the local APIC.
* If woken up by an INIT in an 82489DX configuration the alive
* synchronization guarantees that the CPU does not reach this
* point before an INIT_deassert IPI reaches the local APIC, so it
* is now safe to touch the local APIC.
*
* Set up this CPU, first the APIC, which is probably redundant on
* most boards.
Expand Down Expand Up @@ -213,17 +211,6 @@ static void ap_calibrate_delay(void)
cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
}

static void wait_for_master_cpu(int cpu)
{
/*
* Wait for release by control CPU before continuing with AP
* initialization.
*/
WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
while (!cpumask_test_cpu(cpu, cpu_callout_mask))
cpu_relax();
}

/*
* Activate a secondary processor.
*/
Expand All @@ -244,11 +231,11 @@ static void notrace start_secondary(void *unused)
cpu_init_exception_handling();

/*
* Sync point with wait_cpu_initialized(). Sets AP in
* cpu_initialized_mask and then waits for the control CPU
* to release it.
* Synchronization point with the hotplug core. Sets the
* synchronization state to ALIVE and waits for the control CPU to
* release this CPU for further bringup.
*/
wait_for_master_cpu(raw_smp_processor_id());
cpuhp_ap_sync_alive();

cpu_init();
rcu_cpu_starting(raw_smp_processor_id());
Expand Down Expand Up @@ -278,7 +265,6 @@ static void notrace start_secondary(void *unused)
set_cpu_online(smp_processor_id(), true);
lapic_online();
unlock_vector_lock();
cpu_set_state_online(smp_processor_id());
x86_platform.nmi_init();

/* enable local interrupts */
Expand Down Expand Up @@ -729,9 +715,9 @@ static void impress_friends(void)
* Allow the user to impress friends.
*/
pr_debug("Before bogomips\n");
for_each_possible_cpu(cpu)
if (cpumask_test_cpu(cpu, cpu_callout_mask))
bogosum += cpu_data(cpu).loops_per_jiffy;
for_each_online_cpu(cpu)
bogosum += cpu_data(cpu).loops_per_jiffy;

pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
num_online_cpus(),
bogosum/(500000/HZ),
Expand Down Expand Up @@ -1003,6 +989,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
unsigned long start_ip = real_mode_header->trampoline_start;
int ret;

#ifdef CONFIG_X86_64
/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
Expand Down Expand Up @@ -1043,13 +1030,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
}
}

/*
* AP might wait on cpu_callout_mask in cpu_init() with
* cpu_initialized_mask set if previous attempt to online
* it timed-out. Clear cpu_initialized_mask so that after
* INIT/SIPI it could start with a clean state.
*/
cpumask_clear_cpu(cpu, cpu_initialized_mask);
smp_mb();

/*
Expand All @@ -1060,47 +1040,16 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
* - Use an INIT boot APIC message
*/
if (apic->wakeup_secondary_cpu_64)
return apic->wakeup_secondary_cpu_64(apicid, start_ip);
ret = apic->wakeup_secondary_cpu_64(apicid, start_ip);
else if (apic->wakeup_secondary_cpu)
return apic->wakeup_secondary_cpu(apicid, start_ip);

return wakeup_secondary_cpu_via_init(apicid, start_ip);
}

static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
{
unsigned long timeout;

/*
* Wait up to 10s for the CPU to report in.
*/
timeout = jiffies + 10*HZ;
while (time_before(jiffies, timeout)) {
if (cpumask_test_cpu(cpu, mask))
return 0;

schedule();
}
return -1;
}

/*
* Bringup step two: Wait for the target AP to reach cpu_init_secondary()
* and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
* to proceed. The AP will then proceed past setting its 'callin' bit
* and end up waiting in check_tsc_sync_target() until we reach
* wait_cpu_online() to tend to it.
*/
static int wait_cpu_initialized(unsigned int cpu)
{
/*
* Wait for first sign of life from AP.
*/
if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
return -1;
ret = apic->wakeup_secondary_cpu(apicid, start_ip);
else
ret = wakeup_secondary_cpu_via_init(apicid, start_ip);

cpumask_set_cpu(cpu, cpu_callout_mask);
return 0;
/* If the wakeup mechanism failed, cleanup the warm reset vector */
if (ret)
arch_cpuhp_cleanup_kick_cpu(cpu);
return ret;
}

static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
Expand All @@ -1125,11 +1074,6 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
*/
mtrr_save_state();

/* x86 CPUs take themselves offline, so delayed offline is OK. */
err = cpu_check_up_prepare(cpu);
if (err && err != -EBUSY)
return err;

/* the FPU context is blank, nobody can own it */
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;

Expand All @@ -1146,17 +1090,29 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)

int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int ret;

ret = native_kick_ap(cpu, tidle);
if (!ret)
ret = wait_cpu_initialized(cpu);
return native_kick_ap(cpu, tidle);
}

void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
{
/* Cleanup possible dangling ends... */
if (x86_platform.legacy.warm_reset)
if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset)
smpboot_restore_warm_reset_vector();
}

return ret;
void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
{
if (smp_ops.cleanup_dead_cpu)
smp_ops.cleanup_dead_cpu(cpu);

if (system_state == SYSTEM_RUNNING)
pr_info("CPU %u is now offline\n", cpu);
}

void arch_cpuhp_sync_state_poll(void)
{
if (smp_ops.poll_sync_state)
smp_ops.poll_sync_state();
}

/**
Expand Down Expand Up @@ -1348,9 +1304,6 @@ void __init native_smp_prepare_boot_cpu(void)
if (!IS_ENABLED(CONFIG_SMP))
switch_gdt_and_percpu_base(me);

/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
cpu_set_state_online(me);
native_pv_lock_init();
}

Expand Down Expand Up @@ -1477,8 +1430,6 @@ __init void prefill_possible_map(void)
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
alloc_bootmem_cpumask_var(&cpu_initialized_mask);
alloc_bootmem_cpumask_var(&cpu_callout_mask);
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}

Expand Down Expand Up @@ -1540,9 +1491,6 @@ static void remove_siblinginfo(int cpu)
static void remove_cpu_from_maps(int cpu)
{
set_cpu_online(cpu, false);
cpumask_clear_cpu(cpu, cpu_callout_mask);
/* was set by cpu_init() */
cpumask_clear_cpu(cpu, cpu_initialized_mask);
numa_remove_cpu(cpu);
}

Expand Down Expand Up @@ -1593,36 +1541,11 @@ int native_cpu_disable(void)
return 0;
}

int common_cpu_die(unsigned int cpu)
{
int ret = 0;

/* We don't do anything here: idle task is faking death itself. */

/* They ack this in play_dead() by setting CPU_DEAD */
if (cpu_wait_death(cpu, 5)) {
if (system_state == SYSTEM_RUNNING)
pr_info("CPU %u is now offline\n", cpu);
} else {
pr_err("CPU %u didn't die...\n", cpu);
ret = -1;
}

return ret;
}

void native_cpu_die(unsigned int cpu)
{
common_cpu_die(cpu);
}

void play_dead_common(void)
{
idle_task_exit();

/* Ack it */
(void)cpu_report_death();

cpuhp_ap_report_dead();
/*
* With physical CPU hotplug, we should halt the cpu
*/
Expand Down Expand Up @@ -1724,12 +1647,6 @@ int native_cpu_disable(void)
return -ENOSYS;
}

void native_cpu_die(unsigned int cpu)
{
/* We said "no" in __cpu_disable */
BUG();
}

void native_play_dead(void)
{
BUG();
Expand Down
16 changes: 7 additions & 9 deletions arch/x86/xen/smp_hvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
}

#ifdef CONFIG_HOTPLUG_CPU
static void xen_hvm_cpu_die(unsigned int cpu)
static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
{
if (common_cpu_die(cpu) == 0) {
if (xen_have_vector_callback) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
}
if (xen_have_vector_callback) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
}
}
#else
static void xen_hvm_cpu_die(unsigned int cpu)
static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
{
BUG();
}
Expand All @@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void)
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_cpus_done = xen_smp_cpus_done;
smp_ops.cpu_die = xen_hvm_cpu_die;
smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu;

if (!xen_have_vector_callback) {
#ifdef CONFIG_PARAVIRT_SPINLOCKS
Expand Down
Loading

0 comments on commit 2711b8e

Please sign in to comment.