Skip to content

Commit

Permalink
Merge branch 'akpm' (incoming from Andrew)
Browse files Browse the repository at this point in the history
Merge patches from Andrew Morton:
 "23 fixes and a MAINTAINERS update"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (24 commits)
  mm/hugetlb: check for pte NULL pointer in __page_check_address()
  fix build with make 3.80
  mm/mempolicy: fix !vma in new_vma_page()
  MAINTAINERS: add Davidlohr as GPT maintainer
  mm/memory-failure.c: recheck PageHuge() after hugetlb page migrate successfully
  mm/compaction: respect ignore_skip_hint in update_pageblock_skip
  mm/mempolicy: correct putback method for isolate pages if failed
  mm: add missing dependency in Kconfig
  sh: always link in helper functions extracted from libgcc
  mm: page_alloc: exclude unreclaimable allocations from zone fairness policy
  mm: numa: defer TLB flush for THP migration as long as possible
  mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates
  mm: fix TLB flush race between migration, and change_protection_range
  mm: numa: avoid unnecessary disruption of NUMA hinting during migration
  mm: numa: clear numa hinting information on mprotect
  sched: numa: skip inaccessible VMAs
  mm: numa: avoid unnecessary work on the failure path
  mm: numa: ensure anon_vma is locked to prevent parallel THP splits
  mm: numa: do not clear PTE for pte_numa update
  mm: numa: do not clear PMD during PTE update scan
  ...
  • Loading branch information
Linus Torvalds committed Dec 19, 2013
2 parents a36c160 + 98398c3 commit 86fbf16
Show file tree
Hide file tree
Showing 24 changed files with 249 additions and 57 deletions.
6 changes: 6 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -3833,6 +3833,12 @@ T: git git://linuxtv.org/media_tree.git
S: Maintained
F: drivers/media/usb/gspca/

GUID PARTITION TABLE (GPT)
M: Davidlohr Bueso <davidlohr@hp.com>
L: linux-efi@vger.kernel.org
S: Maintained
F: block/partitions/efi.*

STK1160 USB VIDEO CAPTURE DRIVER
M: Ezequiel Garcia <elezegarcia@gmail.com>
L: linux-media@vger.kernel.org
Expand Down
20 changes: 7 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -732,19 +732,13 @@ export mod_strip_cmd
# Select initial ramdisk compression format, default is gzip(1).
# This shall be used by the dracut(8) tool while creating an initramfs image.
#
INITRD_COMPRESS=gzip
ifeq ($(CONFIG_RD_BZIP2), y)
INITRD_COMPRESS=bzip2
else ifeq ($(CONFIG_RD_LZMA), y)
INITRD_COMPRESS=lzma
else ifeq ($(CONFIG_RD_XZ), y)
INITRD_COMPRESS=xz
else ifeq ($(CONFIG_RD_LZO), y)
INITRD_COMPRESS=lzo
else ifeq ($(CONFIG_RD_LZ4), y)
INITRD_COMPRESS=lz4
endif
export INITRD_COMPRESS
INITRD_COMPRESS-y := gzip
INITRD_COMPRESS-$(CONFIG_RD_BZIP2) := bzip2
INITRD_COMPRESS-$(CONFIG_RD_LZMA) := lzma
INITRD_COMPRESS-$(CONFIG_RD_XZ) := xz
INITRD_COMPRESS-$(CONFIG_RD_LZO) := lzo
INITRD_COMPRESS-$(CONFIG_RD_LZ4) := lz4
export INITRD_COMPRESS := $(INITRD_COMPRESS-y)

ifdef CONFIG_MODULE_SIG_ALL
MODSECKEY = ./signing_key.priv
Expand Down
2 changes: 1 addition & 1 deletion arch/sh/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ lib-y = delay.o memmove.o memchr.o \
checksum.o strlen.o div64.o div64-generic.o

# Extracted from libgcc
lib-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \
obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \
ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \
udiv_qrnnd.o

Expand Down
4 changes: 2 additions & 2 deletions arch/sparc/include/asm/pgtable_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ static inline unsigned long pte_present(pte_t pte)
}

#define pte_accessible pte_accessible
static inline unsigned long pte_accessible(pte_t a)
static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
{
return pte_val(a) & _PAGE_VALID;
}
Expand Down Expand Up @@ -847,7 +847,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
* SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
* and SUN4V pte layout, so this inline test is fine.
*/
if (likely(mm != &init_mm) && pte_accessible(orig))
if (likely(mm != &init_mm) && pte_accessible(mm, orig))
tlb_batch_add(mm, addr, ptep, orig, fullmm);
}

Expand Down
11 changes: 9 additions & 2 deletions arch/x86/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,9 +452,16 @@ static inline int pte_present(pte_t a)
}

#define pte_accessible pte_accessible
static inline int pte_accessible(pte_t a)
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
{
return pte_flags(a) & _PAGE_PRESENT;
if (pte_flags(a) & _PAGE_PRESENT)
return true;

if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
mm_tlb_flush_pending(mm))
return true;

return false;
}

static inline int pte_hidden(pte_t pte)
Expand Down
13 changes: 13 additions & 0 deletions arch/x86/mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
pte_t pte = gup_get_pte(ptep);
struct page *page;

/* Similar to the PMD case, NUMA hinting must take slow path */
if (pte_numa(pte)) {
pte_unmap(ptep);
return 0;
}

if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
pte_unmap(ptep);
return 0;
Expand Down Expand Up @@ -167,6 +173,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (unlikely(pmd_large(pmd))) {
/*
* NUMA hinting faults need to be handled in the GUP
* slowpath for accounting purposes and so that they
* can be serialised against THP migration.
*/
if (pmd_numa(pmd))
return 0;
if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
return 0;
} else {
Expand Down
2 changes: 1 addition & 1 deletion include/asm-generic/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
#endif

#ifndef pte_accessible
# define pte_accessible(pte) ((void)(pte),1)
# define pte_accessible(mm, pte) ((void)(pte), 1)
#endif

#ifndef flush_tlb_fix_spurious_fault
Expand Down
9 changes: 9 additions & 0 deletions include/linux/migrate.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,19 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
#endif /* CONFIG_MIGRATION */

#ifdef CONFIG_NUMA_BALANCING
extern bool pmd_trans_migrating(pmd_t pmd);
extern void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd);
extern int migrate_misplaced_page(struct page *page,
struct vm_area_struct *vma, int node);
extern bool migrate_ratelimited(int node);
#else
static inline bool pmd_trans_migrating(pmd_t pmd)
{
return false;
}
static inline void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
{
}
static inline int migrate_misplaced_page(struct page *page,
struct vm_area_struct *vma, int node)
{
Expand Down
49 changes: 49 additions & 0 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,14 @@ struct mm_struct {

/* numa_scan_seq prevents two threads setting pte_numa */
int numa_scan_seq;
#endif
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
/*
* An operation with batched TLB flushing is going on. Anything that
* can move process memory needs to flush the TLB when moving a
* PROT_NONE or PROT_NUMA mapped page.
*/
bool tlb_flush_pending;
#endif
struct uprobes_state uprobes_state;
};
Expand All @@ -459,4 +467,45 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return mm->cpu_vm_mask_var;
}

#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
/*
* Memory barriers to keep this state in sync are graciously provided by
* the page table locks, outside of which no page table modifications happen.
* The barriers below prevent the compiler from re-ordering the instructions
* around the memory barriers that are already present in the code.
*/
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
barrier();
return mm->tlb_flush_pending;
}
static inline void set_tlb_flush_pending(struct mm_struct *mm)
{
mm->tlb_flush_pending = true;

/*
* Guarantee that the tlb_flush_pending store does not leak into the
* critical section updating the page tables
*/
smp_mb__before_spinlock();
}
/* Clearing is done after a TLB flush, which also provides a barrier. */
static inline void clear_tlb_flush_pending(struct mm_struct *mm)
{
barrier();
mm->tlb_flush_pending = false;
}
#else
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
return false;
}
static inline void set_tlb_flush_pending(struct mm_struct *mm)
{
}
static inline void clear_tlb_flush_pending(struct mm_struct *mm)
{
}
#endif

#endif /* _LINUX_MM_TYPES_H */
1 change: 1 addition & 0 deletions include/linux/reboot.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ extern int unregister_reboot_notifier(struct notifier_block *);
* Architecture-specific implementations of sys_reboot commands.
*/

extern void migrate_to_reboot_cpu(void);
extern void machine_restart(char *cmd);
extern void machine_halt(void);
extern void machine_power_off(void);
Expand Down
1 change: 1 addition & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
spin_lock_init(&mm->page_table_lock);
mm_init_aio(mm);
mm_init_owner(mm, p);
clear_tlb_flush_pending(mm);

if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
Expand Down
1 change: 1 addition & 0 deletions kernel/kexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,7 @@ int kernel_kexec(void)
{
kexec_in_progress = true;
kernel_restart_prepare(NULL);
migrate_to_reboot_cpu();
printk(KERN_EMERG "Starting new kernel\n");
machine_shutdown();
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/reboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ int unregister_reboot_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_reboot_notifier);

static void migrate_to_reboot_cpu(void)
void migrate_to_reboot_cpu(void)
{
/* The boot cpu is always logical cpu 0 */
int cpu = reboot_cpu;
Expand Down
7 changes: 7 additions & 0 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -1738,6 +1738,13 @@ void task_numa_work(struct callback_head *work)
(vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ)))
continue;

/*
* Skip inaccessible VMAs to avoid any confusion between
* PROT_NONE and NUMA hinting ptes
*/
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
continue;

do {
start = max(start, vma->vm_start);
end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ config ZSWAP

config MEM_SOFT_DIRTY
bool "Track memory changes"
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
select PROC_PAGE_MONITOR
help
This option enables memory changes tracking by introducing a
Expand Down
4 changes: 4 additions & 0 deletions mm/compaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ static void update_pageblock_skip(struct compact_control *cc,
bool migrate_scanner)
{
struct zone *zone = cc->zone;

if (cc->ignore_skip_hint)
return;

if (!page)
return;

Expand Down
45 changes: 36 additions & 9 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
ret = 0;
goto out_unlock;
}

/* mmap_sem prevents this happening but warn if that changes */
WARN_ON(pmd_trans_migrating(pmd));

if (unlikely(pmd_trans_splitting(pmd))) {
/* split huge page running from under us */
spin_unlock(src_ptl);
Expand Down Expand Up @@ -1243,6 +1247,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
return ERR_PTR(-EFAULT);

/* Full NUMA hinting faults to serialise migration in fault paths */
if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
goto out;

page = pmd_page(*pmd);
VM_BUG_ON(!PageHead(page));
if (flags & FOLL_TOUCH) {
Expand Down Expand Up @@ -1295,6 +1303,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock;

/*
* If there are potential migrations, wait for completion and retry
* without disrupting NUMA hinting information. Do not relock and
* check_same as the page may no longer be mapped.
*/
if (unlikely(pmd_trans_migrating(*pmdp))) {
spin_unlock(ptl);
wait_migrate_huge_page(vma->anon_vma, pmdp);
goto out;
}

page = pmd_page(pmd);
BUG_ON(is_huge_zero_page(page));
page_nid = page_to_nid(page);
Expand Down Expand Up @@ -1323,23 +1342,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* If the page was locked, there are no parallel migrations */
if (page_locked)
goto clear_pmdnuma;
}

/*
* Otherwise wait for potential migrations and retry. We do
* relock and check_same as the page may no longer be mapped.
* As the fault is being retried, do not account for it.
*/
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
spin_unlock(ptl);
wait_on_page_locked(page);
page_nid = -1;
goto out;
}

/* Page is misplaced, serialise migrations and parallel THP splits */
/*
* Page is misplaced. Page lock serialises migrations. Acquire anon_vma
* to serialises splits
*/
get_page(page);
spin_unlock(ptl);
if (!page_locked)
lock_page(page);
anon_vma = page_lock_anon_vma_read(page);

/* Confirm the PMD did not change while page_table_lock was released */
Expand All @@ -1351,6 +1369,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_unlock;
}

/* Bail if we fail to protect against THP splits for any reason */
if (unlikely(!anon_vma)) {
put_page(page);
page_nid = -1;
goto clear_pmdnuma;
}

/*
* Migrate the THP to the requested node, returns with page unlocked
* and pmd_numa cleared.
Expand Down Expand Up @@ -1517,6 +1542,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
ret = 1;
if (!prot_numa) {
entry = pmdp_get_and_clear(mm, addr, pmd);
if (pmd_numa(entry))
entry = pmd_mknonnuma(entry);
entry = pmd_modify(entry, newprot);
ret = HPAGE_PMD_NR;
BUG_ON(pmd_write(entry));
Expand All @@ -1531,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
*/
if (!is_huge_zero_page(page) &&
!pmd_numa(*pmd)) {
entry = pmdp_get_and_clear(mm, addr, pmd);
entry = *pmd;
entry = pmd_mknuma(entry);
ret = HPAGE_PMD_NR;
}
Expand Down
14 changes: 10 additions & 4 deletions mm/memory-failure.c
Original file line number Diff line number Diff line change
Expand Up @@ -1505,10 +1505,16 @@ static int soft_offline_huge_page(struct page *page, int flags)
if (ret > 0)
ret = -EIO;
} else {
set_page_hwpoison_huge_page(hpage);
dequeue_hwpoisoned_huge_page(hpage);
atomic_long_add(1 << compound_order(hpage),
&num_poisoned_pages);
/* overcommit hugetlb page will be freed to buddy */
if (PageHuge(page)) {
set_page_hwpoison_huge_page(hpage);
dequeue_hwpoisoned_huge_page(hpage);
atomic_long_add(1 << compound_order(hpage),
&num_poisoned_pages);
} else {
SetPageHWPoison(page);
atomic_long_inc(&num_poisoned_pages);
}
}
return ret;
}
Expand Down
Loading

0 comments on commit 86fbf16

Please sign in to comment.