diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 585c66fce9d95..f1e79263fe61e 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -950,6 +950,7 @@ static int cacheinfo_cpu_online(unsigned int cpu) if (rc) goto err; update_per_cpu_data_slice_size(true, cpu); + setup_pcp_cacheinfo(); return 0; err: free_cache_attributes(cpu); @@ -963,6 +964,7 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu) free_cache_attributes(cpu); update_per_cpu_data_slice_size(false, cpu); + setup_pcp_cacheinfo(); return 0; } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 665f06675c834..665edc11fb9fa 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -325,6 +325,7 @@ void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); void page_alloc_init_late(void); +void setup_pcp_cacheinfo(void); /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index de313f1c15f99..efe72b3f78723 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -680,8 +680,14 @@ enum zone_watermarks { * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the * previous page freeing. To avoid to drain PCP for an accident * high-order page freeing. + * + * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before + * draining PCP for consecutive high-order pages freeing without + * allocation if data cache slice of CPU is large enough. To reduce + * zone lock contention and keep cache-hot pages reusing. */ #define PCPF_PREV_FREE_HIGH_ORDER BIT(0) +#define PCPF_FREE_HIGH_BATCH BIT(1) struct per_cpu_pages { spinlock_t lock; /* Protects lists field */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index de547ef9a9adc..b76b1de48a307 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include "internal.h" #include "shuffle.h" @@ -2385,7 +2386,9 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, */ if (order && order <= PAGE_ALLOC_COSTLY_ORDER) { free_high = (pcp->free_factor && - (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER)); + (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) && + (!(pcp->flags & PCPF_FREE_HIGH_BATCH) || + pcp->count >= READ_ONCE(pcp->batch))); pcp->flags |= PCPF_PREV_FREE_HIGH_ORDER; } else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) { pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER; @@ -5418,6 +5421,39 @@ static void zone_pcp_update(struct zone *zone, int cpu_online) mutex_unlock(&pcp_batch_high_lock); } +static void zone_pcp_update_cacheinfo(struct zone *zone) +{ + int cpu; + struct per_cpu_pages *pcp; + struct cpu_cacheinfo *cci; + + for_each_online_cpu(cpu) { + pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + cci = get_cpu_cacheinfo(cpu); + /* + * If data cache slice of CPU is large enough, "pcp->batch" + * pages can be preserved in PCP before draining PCP for + * consecutive high-order pages freeing without allocation. + * This can reduce zone lock contention without hurting + * cache-hot pages sharing. + */ + spin_lock(&pcp->lock); + if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) + pcp->flags |= PCPF_FREE_HIGH_BATCH; + else + pcp->flags &= ~PCPF_FREE_HIGH_BATCH; + spin_unlock(&pcp->lock); + } +} + +void setup_pcp_cacheinfo(void) +{ + struct zone *zone; + + for_each_populated_zone(zone) + zone_pcp_update_cacheinfo(zone); +} + /* * Allocate per cpu pagesets and initialize them. * Before this call only boot pagesets were available.