Skip to content

Commit

Permalink
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/tip/tip

Pull x86/mm changes from Peter Anvin:
 "The big change here is the patchset by Alex Shi to use INVLPG to flush
  only the affected pages when we only need to flush a small page range.

  It also removes the special INVALIDATE_TLB_VECTOR interrupts (32
  vectors!) and replace it with an ordinary IPI function call."

Fix up trivial conflicts in arch/x86/include/asm/apic.h (added code next
to changed line)

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tlb: Fix build warning and crash when building for !SMP
  x86/tlb: do flush_tlb_kernel_range by 'invlpg'
  x86/tlb: replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR
  x86/tlb: enable tlb flush range support for x86
  mm/mmu_gather: enable tlb flush range in generic mmu_gather
  x86/tlb: add tlb_flushall_shift knob into debugfs
  x86/tlb: add tlb_flushall_shift for specific CPU
  x86/tlb: fall back to flush all when meet a THP large page
  x86/flush_tlb: try flush_tlb_single one by one in flush_tlb_range
  x86/tlb_info: get last level TLB entry number of CPU
  x86: Add read_mostly declaration/definition to variables from smp.h
  x86: Define early read-mostly per-cpu macros
  • Loading branch information
Linus Torvalds committed Jul 26, 2012
2 parents 0a2fe19 + 7efa1c8 commit 4cb3875
Show file tree
Hide file tree
Showing 26 changed files with 561 additions and 365 deletions.
19 changes: 19 additions & 0 deletions arch/x86/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,25 @@ config DOUBLEFAULT
option saves about 4k and might cause you much additional grey
hair.

config DEBUG_TLBFLUSH
bool "Set upper limit of TLB entries to flush one-by-one"
depends on DEBUG_KERNEL && (X86_64 || X86_INVLPG)
---help---

X86-only for now.

This option allows the user to tune the amount of TLB entries the
kernel flushes one-by-one instead of doing a full TLB flush. In
certain situations, the former is cheaper. This is controlled by the
tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it
to -1, the code flushes the whole TLB unconditionally. Otherwise,
for positive values of it, the kernel will use single TLB entry
invalidating instructions according to the following formula:

flush_entries <= active_tlb_entries / 2^tlb_flushall_shift

If in doubt, say "N".

config IOMMU_DEBUG
bool "Enable IOMMU debugging"
depends on GART_IOMMU && DEBUG_KERNEL
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/apic.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ static inline const struct cpumask *online_target_cpus(void)
return cpu_online_mask;
}

DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);


static inline unsigned int read_apic_id(void)
Expand Down
9 changes: 0 additions & 9 deletions arch/x86/include/asm/entry_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,6 @@ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)

.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
.if NUM_INVALIDATE_TLB_VECTORS > \idx
BUILD_INTERRUPT3(invalidate_interrupt\idx,
(INVALIDATE_TLB_VECTOR_START)+\idx,
smp_invalidate_interrupt)
.endif
.endr
#endif

BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
Expand Down
11 changes: 0 additions & 11 deletions arch/x86/include/asm/irq_vectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,6 @@
*/
#define LOCAL_TIMER_VECTOR 0xef

/* up to 32 vectors used for spreading out TLB flushes: */
#if NR_CPUS <= 32
# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
#else
# define NUM_INVALIDATE_TLB_VECTORS (32)
#endif

#define INVALIDATE_TLB_VECTOR_END (0xee)
#define INVALIDATE_TLB_VECTOR_START \
(INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)

#define NR_VECTORS 256

#define FPU_IRQ 13
Expand Down
5 changes: 3 additions & 2 deletions arch/x86/include/asm/paravirt.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,10 @@ static inline void __flush_tlb_single(unsigned long addr)

static inline void flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va)
unsigned long start,
unsigned long end)
{
PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
}

static inline int paravirt_pgd_alloc(struct mm_struct *mm)
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/include/asm/paravirt_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ struct pv_mmu_ops {
void (*flush_tlb_single)(unsigned long addr);
void (*flush_tlb_others)(const struct cpumask *cpus,
struct mm_struct *mm,
unsigned long va);
unsigned long start,
unsigned long end);

/* Hooks for allocating and freeing a pagetable top-level */
int (*pgd_alloc)(struct mm_struct *mm);
Expand Down
17 changes: 17 additions & 0 deletions arch/x86/include/asm/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,12 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
{ [0 ... NR_CPUS-1] = _initvalue }; \
__typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map

#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \
__typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
{ [0 ... NR_CPUS-1] = _initvalue }; \
__typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map

#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)

Expand All @@ -559,6 +565,11 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]

#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]

#define early_per_cpu_ptr(_name) (_name##_early_ptr)
#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
#define early_per_cpu(_name, _cpu) \
Expand All @@ -570,12 +581,18 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
DEFINE_PER_CPU(_type, _name) = _initvalue

#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue

#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)

#define DECLARE_EARLY_PER_CPU(_type, _name) \
DECLARE_PER_CPU(_type, _name)

#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
DECLARE_PER_CPU_READ_MOSTLY(_type, _name)

#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
#define early_per_cpu_ptr(_name) NULL
/* no early_per_cpu_map() */
Expand Down
13 changes: 13 additions & 0 deletions arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,19 @@ static inline void *current_text_addr(void)
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif

enum tlb_infos {
ENTRIES,
NR_INFO
};

extern u16 __read_mostly tlb_lli_4k[NR_INFO];
extern u16 __read_mostly tlb_lli_2m[NR_INFO];
extern u16 __read_mostly tlb_lli_4m[NR_INFO];
extern u16 __read_mostly tlb_lld_4k[NR_INFO];
extern u16 __read_mostly tlb_lld_2m[NR_INFO];
extern u16 __read_mostly tlb_lld_4m[NR_INFO];
extern s8 __read_mostly tlb_flushall_shift;

/*
* CPU type and hardware bug flags. Kept separately for each CPU.
* Members of this structure are referenced in head.S, so think twice
Expand Down
16 changes: 8 additions & 8 deletions arch/x86/include/asm/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ static inline bool cpu_has_ht_siblings(void)
return has_siblings;
}

DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);

static inline struct cpumask *cpu_sibling_mask(int cpu)
{
Expand All @@ -53,10 +53,10 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
return per_cpu(cpu_llc_shared_map, cpu);
}

DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
#endif

/* Static state in head.S used to set up a CPU */
Expand Down
9 changes: 8 additions & 1 deletion arch/x86/include/asm/tlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)

#define tlb_flush(tlb) \
{ \
if (tlb->fullmm == 0) \
flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
else \
flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \
}

#include <asm-generic/tlb.h>

Expand Down
49 changes: 28 additions & 21 deletions arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,10 @@ static inline void __flush_tlb_one(unsigned long addr)
* - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
* - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
* - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
*
* ..but the i386 has somewhat limited tlb flushing capabilities,
* and page-granular flushes are available only on i486 and up.
*
* x86-64 can only flush individual pages or full VMs. For a range flush
* we always do the full VM. Might be worth trying if for a small
* range a few INVLPGs in a row are a win.
*/

#ifndef CONFIG_SMP
Expand Down Expand Up @@ -109,37 +105,53 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
__flush_tlb();
}

static inline void flush_tlb_mm_range(struct mm_struct *mm,
unsigned long start, unsigned long end, unsigned long vmflag)
{
if (mm == current->active_mm)
__flush_tlb();
}

static inline void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va)
unsigned long start,
unsigned long end)
{
}

static inline void reset_lazy_tlbstate(void)
{
}

static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
flush_tlb_all();
}

#else /* SMP */

#include <asm/smp.h>

#define local_flush_tlb() __flush_tlb()

#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)

#define flush_tlb_range(vma, start, end) \
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)

extern void flush_tlb_all(void);
extern void flush_tlb_current_task(void);
extern void flush_tlb_mm(struct mm_struct *);
extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);

#define flush_tlb() flush_tlb_current_task()

static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
flush_tlb_mm(vma->vm_mm);
}

void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va);
struct mm_struct *mm,
unsigned long start, unsigned long end);

#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
Expand All @@ -159,13 +171,8 @@ static inline void reset_lazy_tlbstate(void)
#endif /* SMP */

#ifndef CONFIG_PARAVIRT
#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va)
#define flush_tlb_others(mask, mm, start, end) \
native_flush_tlb_others(mask, mm, start, end)
#endif

static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
flush_tlb_all();
}

#endif /* _ASM_X86_TLBFLUSH_H */
5 changes: 3 additions & 2 deletions arch/x86/include/asm/uv/uv.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ extern void uv_nmi_init(void);
extern void uv_system_init(void);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va,
unsigned long start,
unsigned end,
unsigned int cpu);

#else /* X86_UV */
Expand All @@ -26,7 +27,7 @@ static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline const struct cpumask *
uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
unsigned long va, unsigned int cpu)
unsigned long start, unsigned long end, unsigned int cpu)
{ return cpumask; }

#endif /* X86_UV */
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kernel/apic/apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map;
/*
* Map cpu index to physical APIC ID
*/
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);

Expand All @@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
* used for the mapping. This is where the behaviors of x86_64 and 32
* actually diverge. Let's keep it ugly for now.
*/
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);

/*
* Knob to control our willingness to enable the local APIC.
Expand Down
31 changes: 31 additions & 0 deletions arch/x86/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
c->x86_cache_size = l2size;
}

u16 __read_mostly tlb_lli_4k[NR_INFO];
u16 __read_mostly tlb_lli_2m[NR_INFO];
u16 __read_mostly tlb_lli_4m[NR_INFO];
u16 __read_mostly tlb_lld_4k[NR_INFO];
u16 __read_mostly tlb_lld_2m[NR_INFO];
u16 __read_mostly tlb_lld_4m[NR_INFO];

/*
* tlb_flushall_shift shows the balance point in replacing cr3 write
* with multiple 'invlpg'. It will do this replacement when
* flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
* If tlb_flushall_shift is -1, means the replacement will be disabled.
*/
s8 __read_mostly tlb_flushall_shift = -1;

void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
{
if (this_cpu->c_detect_tlb)
this_cpu->c_detect_tlb(c);

printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
"tlb_flushall_shift is 0x%x\n",
tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
tlb_flushall_shift);
}

void __cpuinit detect_ht(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_HT
Expand Down Expand Up @@ -911,6 +940,8 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
if (boot_cpu_data.cpuid_level >= 2)
cpu_detect_tlb(&boot_cpu_data);
}

void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
Expand Down
Loading

0 comments on commit 4cb3875

Please sign in to comment.