Skip to content

Commit

Permalink
mm/page_alloc: fix race condition in unaccepted memory handling
Browse files Browse the repository at this point in the history
The page allocator tracks the number of zones that have unaccepted memory
using static_branch_enc/dec() and uses that static branch in hot paths to
determine if it needs to deal with unaccepted memory.

Borislav and Thomas pointed out that the tracking is racy: operations on
static_branch are not serialized against adding/removing unaccepted pages
to/from the zone.

Sanity checks inside static_branch machinery detects it:

WARNING: CPU: 0 PID: 10 at kernel/jump_label.c:276 __static_key_slow_dec_cpuslocked+0x8e/0xa0

The comment around the WARN() explains the problem:

	/*
	 * Warn about the '-1' case though; since that means a
	 * decrement is concurrent with a first (0->1) increment. IOW
	 * people are trying to disable something that wasn't yet fully
	 * enabled. This suggests an ordering problem on the user side.
	 */

The effect of this static_branch optimization is only visible on
microbenchmark.

Instead of adding more complexity around it, remove it altogether.

Link: https://lkml.kernel.org/r/20250506133207.1009676-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fixes: dcdfdd4 ("mm: Add support for unaccepted memory")
Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.local
Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov (AMD) <bp@alien8.de>
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>	[6.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  • Loading branch information
Kirill A. Shutemov authored and Andrew Morton committed May 12, 2025
1 parent 23fa022 commit fefc075
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 49 deletions.
1 change: 0 additions & 1 deletion mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1590,7 +1590,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);

#ifdef CONFIG_UNACCEPTED_MEMORY
void accept_page(struct page *page);
void unaccepted_cleanup_work(struct work_struct *work);
#else /* CONFIG_UNACCEPTED_MEMORY */
static inline void accept_page(struct page *page)
{
Expand Down
1 change: 0 additions & 1 deletion mm/mm_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)

#ifdef CONFIG_UNACCEPTED_MEMORY
INIT_LIST_HEAD(&zone->unaccepted_pages);
INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
#endif
}

Expand Down
47 changes: 0 additions & 47 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -7172,16 +7172,8 @@ bool has_managed_dma(void)

#ifdef CONFIG_UNACCEPTED_MEMORY

/* Counts number of zones with unaccepted pages. */
static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);

static bool lazy_accept = true;

void unaccepted_cleanup_work(struct work_struct *work)
{
static_branch_dec(&zones_with_unaccepted_pages);
}

static int __init accept_memory_parse(char *p)
{
if (!strcmp(p, "lazy")) {
Expand All @@ -7206,11 +7198,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
static void __accept_page(struct zone *zone, unsigned long *flags,
struct page *page)
{
bool last;

list_del(&page->lru);
last = list_empty(&zone->unaccepted_pages);

account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
__ClearPageUnaccepted(page);
Expand All @@ -7219,28 +7207,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);

__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);

if (last) {
/*
* There are two corner cases:
*
* - If allocation occurs during the CPU bring up,
* static_branch_dec() cannot be used directly as
* it causes a deadlock on cpu_hotplug_lock.
*
* Instead, use schedule_work() to prevent deadlock.
*
* - If allocation occurs before workqueues are initialized,
* static_branch_dec() should be called directly.
*
* Workqueues are initialized before CPU bring up, so this
* will not conflict with the first scenario.
*/
if (system_wq)
schedule_work(&zone->unaccepted_cleanup);
else
unaccepted_cleanup_work(&zone->unaccepted_cleanup);
}
}

void accept_page(struct page *page)
Expand Down Expand Up @@ -7277,20 +7243,12 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}

static inline bool has_unaccepted_memory(void)
{
return static_branch_unlikely(&zones_with_unaccepted_pages);
}

static bool cond_accept_memory(struct zone *zone, unsigned int order,
int alloc_flags)
{
long to_accept, wmark;
bool ret = false;

if (!has_unaccepted_memory())
return false;

if (list_empty(&zone->unaccepted_pages))
return false;

Expand Down Expand Up @@ -7328,22 +7286,17 @@ static bool __free_unaccepted(struct page *page)
{
struct zone *zone = page_zone(page);
unsigned long flags;
bool first = false;

if (!lazy_accept)
return false;

spin_lock_irqsave(&zone->lock, flags);
first = list_empty(&zone->unaccepted_pages);
list_add_tail(&page->lru, &zone->unaccepted_pages);
account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
__SetPageUnaccepted(page);
spin_unlock_irqrestore(&zone->lock, flags);

if (first)
static_branch_inc(&zones_with_unaccepted_pages);

return true;
}

Expand Down

0 comments on commit fefc075

Please sign in to comment.