Skip to content

Commit

Permalink
Merge branch 'x86-amd-nb-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/linux-2.6-tip

* 'x86-amd-nb-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, amd_nb: Enable GART support for AMD family 0x15 CPUs
  x86, amd: Use compute unit information to determine thread siblings
  x86, amd: Extract compute unit information for AMD CPUs
  x86, amd: Add support for CPUID topology extension of AMD CPUs
  x86, nmi: Support NMI watchdog on newer AMD CPU families
  x86, mtrr: Assume SYS_CFG[Tom2ForceMemTypeWB] exists on all future AMD CPUs
  x86, k8: Rename k8.[ch] to amd_nb.[ch] and CONFIG_K8_NB to CONFIG_AMD_NB
  x86, k8-gart: Decouple handling of garts and northbridges
  x86, cacheinfo: Fix dependency of AMD L3 CID
  x86, kvm: add new AMD SVM feature bits
  x86, cpu: Fix allowed CPUID bits for KVM guests
  x86, cpu: Update AMD CPUID feature bits
  x86, cpu: Fix renamed, not-yet-shipping AMD CPUID feature bit
  x86, AMD: Remove needless CPU family check (for L3 cache info)
  x86, tsc: Remove CPU frequency calibration on AMD
  • Loading branch information
Linus Torvalds committed Oct 21, 2010
2 parents bc4016f + 5c80cc7 commit 2f0384e
Show file tree
Hide file tree
Showing 24 changed files with 208 additions and 170 deletions.
4 changes: 2 additions & 2 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ config GART_IOMMU
bool "GART IOMMU support" if EMBEDDED
default y
select SWIOTLB
depends on X86_64 && PCI && K8_NB
depends on X86_64 && PCI && AMD_NB
---help---
Support for full DMA access of devices with 32bit memory access only
on systems with more than 3GB. This is usually needed for USB,
Expand Down Expand Up @@ -2091,7 +2091,7 @@ config OLPC_OPENFIRMWARE

endif # X86_32

config K8_NB
config AMD_NB
def_bool y
depends on CPU_SUP_AMD && PCI

Expand Down
21 changes: 12 additions & 9 deletions arch/x86/include/asm/k8.h → arch/x86/include/asm/amd_nb.h
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
#ifndef _ASM_X86_K8_H
#define _ASM_X86_K8_H
#ifndef _ASM_X86_AMD_NB_H
#define _ASM_X86_AMD_NB_H

#include <linux/pci.h>

extern struct pci_device_id k8_nb_ids[];
struct bootnode;

extern int early_is_k8_nb(u32 value);
extern struct pci_dev **k8_northbridges;
extern int num_k8_northbridges;
extern int cache_k8_northbridges(void);
extern void k8_flush_garts(void);
extern int k8_get_nodes(struct bootnode *nodes);
extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int k8_scan_nodes(void);

#ifdef CONFIG_K8_NB
extern int num_k8_northbridges;
struct k8_northbridge_info {
u16 num;
u8 gart_supported;
struct pci_dev **nb_misc;
};
extern struct k8_northbridge_info k8_northbridges;

#ifdef CONFIG_AMD_NB

static inline struct pci_dev *node_to_k8_nb_misc(int node)
{
return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
}

#else
#define num_k8_northbridges 0

static inline struct pci_dev *node_to_k8_nb_misc(int node)
{
Expand All @@ -33,4 +36,4 @@ static inline struct pci_dev *node_to_k8_nb_misc(int node)
#endif


#endif /* _ASM_X86_K8_H */
#endif /* _ASM_X86_AMD_NB_H */
13 changes: 12 additions & 1 deletion arch/x86/include/asm/cpufeature.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,14 @@
#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */
#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */

/*
* Auxiliary flags: Linux defined - For features scattered in various
Expand All @@ -180,6 +184,13 @@
#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */
#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */
#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */
#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */


/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ struct cpuinfo_x86 {
u16 phys_proc_id;
/* Core id: */
u16 cpu_core_id;
/* Compute unit id */
u8 compute_unit_id;
/* Index into per_cpu list: */
u16 cpu_index;
#endif
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_APB_TIMER) += apb_timer.o

obj-$(CONFIG_K8_NB) += k8.o
obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o

Expand Down
56 changes: 33 additions & 23 deletions arch/x86/kernel/k8.c → arch/x86/kernel/amd_nb.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,19 @@
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <asm/k8.h>

int num_k8_northbridges;
EXPORT_SYMBOL(num_k8_northbridges);
#include <asm/amd_nb.h>

static u32 *flush_words;

struct pci_device_id k8_nb_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
{}
};
EXPORT_SYMBOL(k8_nb_ids);

struct pci_dev **k8_northbridges;
struct k8_northbridge_info k8_northbridges;
EXPORT_SYMBOL(k8_northbridges);

static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
Expand All @@ -40,36 +38,45 @@ int cache_k8_northbridges(void)
int i;
struct pci_dev *dev;

if (num_k8_northbridges)
if (k8_northbridges.num)
return 0;

dev = NULL;
while ((dev = next_k8_northbridge(dev)) != NULL)
num_k8_northbridges++;
k8_northbridges.num++;

/* some CPU families (e.g. family 0x11) do not support GART */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
boot_cpu_data.x86 == 0x15)
k8_northbridges.gart_supported = 1;

k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
GFP_KERNEL);
if (!k8_northbridges)
k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
sizeof(void *), GFP_KERNEL);
if (!k8_northbridges.nb_misc)
return -ENOMEM;

if (!num_k8_northbridges) {
k8_northbridges[0] = NULL;
if (!k8_northbridges.num) {
k8_northbridges.nb_misc[0] = NULL;
return 0;
}

flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
if (!flush_words) {
kfree(k8_northbridges);
return -ENOMEM;
if (k8_northbridges.gart_supported) {
flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
GFP_KERNEL);
if (!flush_words) {
kfree(k8_northbridges.nb_misc);
return -ENOMEM;
}
}

dev = NULL;
i = 0;
while ((dev = next_k8_northbridge(dev)) != NULL) {
k8_northbridges[i] = dev;
pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
k8_northbridges.nb_misc[i] = dev;
if (k8_northbridges.gart_supported)
pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
}
k8_northbridges[i] = NULL;
k8_northbridges.nb_misc[i] = NULL;
return 0;
}
EXPORT_SYMBOL_GPL(cache_k8_northbridges);
Expand All @@ -93,22 +100,25 @@ void k8_flush_garts(void)
unsigned long flags;
static DEFINE_SPINLOCK(gart_lock);

if (!k8_northbridges.gart_supported)
return;

/* Avoid races between AGP and IOMMU. In theory it's not needed
but I'm not sure if the hardware won't lose flush requests
when another is pending. This whole thing is so expensive anyways
that it doesn't matter to serialize more. -AK */
spin_lock_irqsave(&gart_lock, flags);
flushed = 0;
for (i = 0; i < num_k8_northbridges; i++) {
pci_write_config_dword(k8_northbridges[i], 0x9c,
for (i = 0; i < k8_northbridges.num; i++) {
pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
flush_words[i]|1);
flushed++;
}
for (i = 0; i < num_k8_northbridges; i++) {
for (i = 0; i < k8_northbridges.num; i++) {
u32 w;
/* Make sure the hardware actually executed the flush*/
for (;;) {
pci_read_config_dword(k8_northbridges[i],
pci_read_config_dword(k8_northbridges.nb_misc[i],
0x9c, &w);
if (!(w & 1))
break;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/aperture_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include <asm/gart.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
#include <asm/k8.h>
#include <asm/amd_nb.h>
#include <asm/x86_init.h>

int gart_iommu_aperture;
Expand Down
75 changes: 52 additions & 23 deletions arch/x86/kernel/cpu/amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,37 +253,51 @@ static int __cpuinit nearby_node(int apicid)
#endif

/*
* Fixup core topology information for AMD multi-node processors.
* Assumption: Number of cores in each internal node is the same.
* Fixup core topology information for
* (1) AMD multi-node processors
* Assumption: Number of cores in each internal node is the same.
* (2) AMD processors supporting compute units
*/
#ifdef CONFIG_X86_HT
static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
{
unsigned long long value;
u32 nodes, cores_per_node;
u32 nodes;
u8 node_id;
int cpu = smp_processor_id();

if (!cpu_has(c, X86_FEATURE_NODEID_MSR))
return;
/* get information required for multi-node processors */
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
u32 eax, ebx, ecx, edx;

/* fixup topology information only once for a core */
if (cpu_has(c, X86_FEATURE_AMD_DCM))
return;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
nodes = ((ecx >> 8) & 7) + 1;
node_id = ecx & 7;

rdmsrl(MSR_FAM10H_NODE_ID, value);
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->compute_unit_id = ebx & 0xff;
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;

nodes = ((value >> 3) & 7) + 1;
if (nodes == 1)
rdmsrl(MSR_FAM10H_NODE_ID, value);
nodes = ((value >> 3) & 7) + 1;
node_id = value & 7;
} else
return;

set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;
/* fixup multi-node processor information */
if (nodes > 1) {
u32 cores_per_node;

set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;

/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = value & 7;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;

/* fixup core id to be in range from 0 to (cores_per_node - 1) */
c->cpu_core_id = c->cpu_core_id % cores_per_node;
/* core id to be in range from 0 to (cores_per_node - 1) */
c->cpu_core_id = c->cpu_core_id % cores_per_node;
}
}
#endif

Expand All @@ -304,9 +318,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
/* fixup topology information on multi-node processors */
if ((c->x86 == 0x10) && (c->x86_model == 9))
amd_fixup_dcm(c);
amd_get_topology(c);
#endif
}

Expand Down Expand Up @@ -412,6 +424,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
}
#endif

/* We need to do the following only once */
if (c != &boot_cpu_data)
return;

if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {

if (c->x86 > 0x10 ||
(c->x86 == 0x10 && c->x86_model >= 0x2)) {
u64 val;

rdmsrl(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
printk(KERN_WARNING FW_BUG "TSC doesn't count "
"with P0 frequency!\n");
}
}
}

static void __cpuinit init_amd(struct cpuinfo_x86 *c)
Expand Down Expand Up @@ -523,7 +552,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
#endif

if (c->extended_cpuid_level >= 0x80000006) {
if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000))
if (cpuid_edx(0x80000006) & 0xf000)
num_cache_leaves = 4;
else
num_cache_leaves = 3;
Expand Down
Loading

0 comments on commit 2f0384e

Please sign in to comment.