Skip to content

Commit

Permalink
[PATCH] x86: rewrite SMP TSC sync code
Browse files Browse the repository at this point in the history
make the TSC synchronization code more robust, and unify it between x86_64 and
i386.

The biggest change is the removal of the 'fix up TSCs' code on x86_64 and
i386, in some rare cases it was /causing/ time-warps on SMP systems.

The new code only checks for TSC asynchronity - and if it can prove a
time-warp (if it can observe the TSC going backwards when going from one CPU
to another within a critical section), then the TSC clock-source is turned
off.

The TSC synchronization-checking code also got moved into a separate file.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Ingo Molnar authored and Linus Torvalds committed Feb 16, 2007
1 parent 92c7e00 commit 95492e4
Show file tree
Hide file tree
Showing 12 changed files with 295 additions and 463 deletions.
2 changes: 1 addition & 1 deletion arch/i386/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
Expand Down
178 changes: 12 additions & 166 deletions arch/i386/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,6 @@ cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;

/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
* is no way to resync one AP against BP. TBD: for prescott and above, we
* should use IA64's algorithm
*/
static int __devinitdata tsc_sync_disabled;

/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data);
Expand Down Expand Up @@ -216,151 +210,6 @@ static void __cpuinit smp_store_cpu_info(int id)
;
}

/*
* TSC synchronization.
*
* We first check whether all CPUs have their TSC's synchronized,
* then we print a warning if not, and always resync.
*/

static struct {
atomic_t start_flag;
atomic_t count_start;
atomic_t count_stop;
unsigned long long values[NR_CPUS];
} tsc __cpuinitdata = {
.start_flag = ATOMIC_INIT(0),
.count_start = ATOMIC_INIT(0),
.count_stop = ATOMIC_INIT(0),
};

#define NR_LOOPS 5

static void __init synchronize_tsc_bp(void)
{
int i;
unsigned long long t0;
unsigned long long sum, avg;
long long delta;
unsigned int one_usec;
int buggy = 0;

printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());

/* convert from kcyc/sec to cyc/usec */
one_usec = cpu_khz / 1000;

atomic_set(&tsc.start_flag, 1);
wmb();

/*
* We loop a few times to get a primed instruction cache,
* then the last pass is more or less synchronized and
* the BP and APs set their cycle counters to zero all at
* once. This reduces the chance of having random offsets
* between the processors, and guarantees that the maximum
* delay between the cycle counters is never bigger than
* the latency of information-passing (cachelines) between
* two CPUs.
*/
for (i = 0; i < NR_LOOPS; i++) {
/*
* all APs synchronize but they loop on '== num_cpus'
*/
while (atomic_read(&tsc.count_start) != num_booting_cpus()-1)
cpu_relax();
atomic_set(&tsc.count_stop, 0);
wmb();
/*
* this lets the APs save their current TSC:
*/
atomic_inc(&tsc.count_start);

rdtscll(tsc.values[smp_processor_id()]);
/*
* We clear the TSC in the last loop:
*/
if (i == NR_LOOPS-1)
write_tsc(0, 0);

/*
* Wait for all APs to leave the synchronization point:
*/
while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1)
cpu_relax();
atomic_set(&tsc.count_start, 0);
wmb();
atomic_inc(&tsc.count_stop);
}

sum = 0;
for (i = 0; i < NR_CPUS; i++) {
if (cpu_isset(i, cpu_callout_map)) {
t0 = tsc.values[i];
sum += t0;
}
}
avg = sum;
do_div(avg, num_booting_cpus());

for (i = 0; i < NR_CPUS; i++) {
if (!cpu_isset(i, cpu_callout_map))
continue;
delta = tsc.values[i] - avg;
if (delta < 0)
delta = -delta;
/*
* We report bigger than 2 microseconds clock differences.
*/
if (delta > 2*one_usec) {
long long realdelta;

if (!buggy) {
buggy = 1;
printk("\n");
}
realdelta = delta;
do_div(realdelta, one_usec);
if (tsc.values[i] < avg)
realdelta = -realdelta;

if (realdelta)
printk(KERN_INFO "CPU#%d had %Ld usecs TSC "
"skew, fixed it up.\n", i, realdelta);
}
}
if (!buggy)
printk("passed.\n");
}

static void __cpuinit synchronize_tsc_ap(void)
{
int i;

/*
* Not every cpu is online at the time
* this gets called, so we first wait for the BP to
* finish SMP initialization:
*/
while (!atomic_read(&tsc.start_flag))
cpu_relax();

for (i = 0; i < NR_LOOPS; i++) {
atomic_inc(&tsc.count_start);
while (atomic_read(&tsc.count_start) != num_booting_cpus())
cpu_relax();

rdtscll(tsc.values[smp_processor_id()]);
if (i == NR_LOOPS-1)
write_tsc(0, 0);

atomic_inc(&tsc.count_stop);
while (atomic_read(&tsc.count_stop) != num_booting_cpus())
cpu_relax();
}
}
#undef NR_LOOPS

extern void calibrate_delay(void);

static atomic_t init_deasserted;
Expand Down Expand Up @@ -446,12 +295,6 @@ static void __cpuinit smp_callin(void)
* Allow the master to continue.
*/
cpu_set(cpuid, cpu_callin_map);

/*
* Synchronize the TSC with the BP
*/
if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
synchronize_tsc_ap();
}

static int cpucount;
Expand Down Expand Up @@ -554,6 +397,11 @@ static void __cpuinit start_secondary(void *unused)
smp_callin();
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
rep_nop();
/*
* Check TSC synchronization with the BP:
*/
check_tsc_sync_target();

setup_secondary_clock();
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
Expand Down Expand Up @@ -1125,16 +973,13 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
info.cpu = cpu;
INIT_WORK(&info.task, do_warm_boot_cpu);

tsc_sync_disabled = 1;

/* init low mem mapping */
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
flush_tlb_all();
schedule_work(&info.task);
wait_for_completion(&done);

tsc_sync_disabled = 0;
zap_low_mappings();
ret = 0;
exit:
Expand Down Expand Up @@ -1331,12 +1176,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
smpboot_setup_io_apic();

setup_boot_clock();

/*
* Synchronize the TSC with the AP
*/
if (cpu_has_tsc && cpucount && cpu_khz)
synchronize_tsc_bp();
}

/* These are wrappers to interface to the new boot process. Someone
Expand Down Expand Up @@ -1471,9 +1310,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
}

local_irq_enable();

per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);

/*
* Check TSC synchronization with the AP:
*/
check_tsc_sync_source(cpu);

while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();

Expand Down
4 changes: 3 additions & 1 deletion arch/i386/kernel/tsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,10 @@ static void verify_tsc_freq(unsigned long unused)
* Make an educated guess if the TSC is trustworthy and synchronized
* over all CPUs.
*/
static __init int unsynchronized_tsc(void)
__cpuinit int unsynchronized_tsc(void)
{
if (!cpu_has_tsc || tsc_unstable)
return 1;
/*
* Intel systems are normally all synchronized.
* Exceptions must mark TSC as unstable:
Expand Down
1 change: 1 addition & 0 deletions arch/i386/kernel/tsc_sync.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "../../x86_64/kernel/tsc_sync.c"
2 changes: 1 addition & 1 deletion arch/x86_64/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
obj-y += apic.o nmi.o
obj-y += io_apic.o mpparse.o \
genapic.o genapic_cluster.o genapic_flat.o
Expand Down
Loading

0 comments on commit 95492e4

Please sign in to comment.