diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index 30f1344e7ccaf..873efd97f8228 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -165,6 +165,7 @@ Registration pp_params.pool_size = DESC_NUM; pp_params.nid = NUMA_NO_NODE; pp_params.dev = priv->dev; + pp_params.napi = napi; /* only if locking is tied to NAPI */ pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; page_pool = page_pool_create(&pp_params); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 96d27d558b0c7..203c0df2046cb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -360,8 +360,11 @@ struct napi_struct { unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL + /* CPU actively polling if netpoll is configured */ int poll_owner; #endif + /* CPU on which NAPI has been scheduled for processing */ + int list_owner; struct net_device *dev; struct gro_list gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 494a23a976b07..a823ec3aa3262 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3386,6 +3386,18 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +static inline void +napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) +{ + struct page *page = skb_frag_page(frag); + +#ifdef CONFIG_PAGE_POOL + if (recycle && page_pool_return_skb_page(page, napi_safe)) + return; +#endif + put_page(page); +} + /** * __skb_frag_unref - release a reference on a paged fragment. * @frag: the paged fragment @@ -3396,13 +3408,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) */ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) { - struct page *page = skb_frag_page(frag); - -#ifdef CONFIG_PAGE_POOL - if (recycle && page_pool_return_skb_page(page)) - return; -#endif - put_page(page); + napi_frag_unref(frag, recycle, false); } /** diff --git a/include/net/page_pool.h b/include/net/page_pool.h index ddfa0b3286777..91b808dade820 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -77,6 +77,7 @@ struct page_pool_params { unsigned int pool_size; int nid; /* Numa node id to allocate from pages from */ struct device *dev; /* device, for DMA pre-mapping purposes */ + struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */ enum dma_data_direction dma_dir; /* DMA mapping direction */ unsigned int max_len; /* max DMA sync memory size */ unsigned int offset; /* DMA addr offset */ @@ -239,7 +240,7 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) return pool->p.dma_dir; } -bool page_pool_return_skb_page(struct page *page); +bool page_pool_return_skb_page(struct page *page, bool napi_safe); struct page_pool *page_pool_create(const struct page_pool_params *params); diff --git a/net/core/dev.c b/net/core/dev.c index c7f13742b56cc..8aea68275172c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4359,6 +4359,7 @@ static inline void ____napi_schedule(struct softnet_data *sd, } list_add_tail(&napi->poll_list, &sd->poll_list); + WRITE_ONCE(napi->list_owner, smp_processor_id()); /* If not called from net_rx_action() * we have to raise NET_RX_SOFTIRQ. */ @@ -6069,6 +6070,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) list_del_init(&n->poll_list); local_irq_restore(flags); } + WRITE_ONCE(n->list_owner, -1); val = READ_ONCE(n->state); do { @@ -6384,6 +6386,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, #ifdef CONFIG_NETPOLL napi->poll_owner = -1; #endif + napi->list_owner = -1; set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 193c187998650..2f6bf422ed303 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -19,6 +19,7 @@ #include /* for put_page() */ #include #include +#include #include @@ -874,9 +875,11 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) } EXPORT_SYMBOL(page_pool_update_nid); -bool page_pool_return_skb_page(struct page *page) +bool page_pool_return_skb_page(struct page *page, bool napi_safe) { + struct napi_struct *napi; struct page_pool *pp; + bool allow_direct; page = compound_head(page); @@ -892,12 +895,20 @@ bool page_pool_return_skb_page(struct page *page) pp = page->pp; + /* Allow direct recycle if we have reasons to believe that we are + * in the same context as the consumer would run, so there's + * no possible race. + */ + napi = pp->p.napi; + allow_direct = napi_safe && napi && + READ_ONCE(napi->list_owner) == smp_processor_id(); + /* Driver set this to memory recycling info. Reset it on recycle. * This will *not* work for NIC using a split-page memory model. * The page will be returned to the pool here regardless of the * 'flipped' fragment being in use or not. */ - page_pool_put_full_page(pp, page, false); + page_pool_put_full_page(pp, page, allow_direct); return true; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2b5a98c5cb495..ef81452759be3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -843,7 +843,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; - return page_pool_return_skb_page(virt_to_page(data)); + return page_pool_return_skb_page(virt_to_page(data), napi_safe); } static void skb_kfree_head(void *head, unsigned int end_offset) @@ -889,7 +889,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason, } for (i = 0; i < shinfo->nr_frags; i++) - __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle); + napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe); free_head: if (shinfo->frag_list)