From 0afdeeed08b5f59148069e9f7a52d2ab0eba5d49 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Tue, 18 Jun 2019 15:05:12 +0200 Subject: [PATCH 01/12] net: page_pool: add helper function to retrieve dma addresses On a previous patch dma addr was stored in 'struct page'. Use that to retrieve DMA addresses used by network drivers Signed-off-by: Ilias Apalodimas Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/page_pool.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 694d055e01efe..b885d86cb7a1d 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -132,6 +132,11 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, __page_pool_put_page(pool, page, true); } +static inline dma_addr_t page_pool_get_dma_addr(struct page *page) +{ + return page->dma_addr; +} + static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL From a25d50bfe645b3ed6b2cb3773e7025db14a608f3 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Tue, 18 Jun 2019 15:05:17 +0200 Subject: [PATCH 02/12] net: page_pool: add helper function to unmap dma addresses On a previous patch dma addr was stored in 'struct page'. Use that to unmap DMA addresses used by network drivers Signed-off-by: Ilias Apalodimas Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/page_pool.h | 1 + net/core/page_pool.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index b885d86cb7a1d..ad218cef88c54 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -110,6 +110,7 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) struct page_pool *page_pool_create(const struct page_pool_params *params); void page_pool_destroy(struct page_pool *pool); +void page_pool_unmap_page(struct page_pool *pool, struct page *page); /* Never call this directly, use helpers below */ void __page_pool_put_page(struct page_pool *pool, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5b2252c6d49ba..205af7bd6d098 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -190,6 +190,13 @@ static void __page_pool_clean_page(struct page_pool *pool, page->dma_addr = 0; } +/* unmap the page and clean our state */ +void page_pool_unmap_page(struct page_pool *pool, struct page *page) +{ + __page_pool_clean_page(pool, page); +} +EXPORT_SYMBOL(page_pool_unmap_page); + /* Return a page to the page allocator, cleaning up our state */ static void __page_pool_return_page(struct page_pool *pool, struct page *page) { From 516a7593fda6f20a04988e988725a182644f67b4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:22 +0200 Subject: [PATCH 03/12] xdp: fix leak of IDA cyclic id if rhashtable_insert_slow fails Fix error handling case, where inserting ID with rhashtable_insert_slow fails in xdp_rxq_info_reg_mem_model, which leads to never releasing the IDA ID, as the lookup in xdp_rxq_info_unreg_mem_model fails and thus ida_simple_remove() is never called. Fix by releasing ID via ida_simple_remove(), and mark xdp_rxq->mem.id with zero, which is already checked in xdp_rxq_info_unreg_mem_model(). Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller --- net/core/xdp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/xdp.c b/net/core/xdp.c index 8aab08b131d92..1d5f2292962cb 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -301,6 +301,8 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, /* Insert allocator into ID lookup table */ ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); if (IS_ERR(ptr)) { + ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id); + xdp_rxq->mem.id = 0; errno = PTR_ERR(ptr); goto err; } From 6bf071bf09d4b2ff3ee8783531e2ce814f0870cb Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:27 +0200 Subject: [PATCH 04/12] xdp: page_pool related fix to cpumap When converting an xdp_frame into an SKB, and sending this into the network stack, then the underlying XDP memory model need to release associated resources, because the network stack don't have callbacks for XDP memory models. The only memory model that needs this is page_pool, when a driver use the DMA-mapping feature. Introduce page_pool_release_page(), which basically does the same as page_pool_unmap_page(). Add xdp_release_frame() as the XDP memory model interface for calling it, if the memory model match MEM_TYPE_PAGE_POOL, to save the function call overhead for others. Have cpumap call xdp_release_frame() before xdp_scrub_frame(). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/page_pool.h | 15 ++++++++++++++- include/net/xdp.h | 15 +++++++++++++++ kernel/bpf/cpumap.c | 3 +++ net/core/xdp.c | 15 +++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index ad218cef88c54..e240fac4c5b94 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -110,7 +110,6 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) struct page_pool *page_pool_create(const struct page_pool_params *params); void page_pool_destroy(struct page_pool *pool); -void page_pool_unmap_page(struct page_pool *pool, struct page *page); /* Never call this directly, use helpers below */ void __page_pool_put_page(struct page_pool *pool, @@ -133,6 +132,20 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, __page_pool_put_page(pool, page, true); } +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +void page_pool_unmap_page(struct page_pool *pool, struct page *page); +static inline void page_pool_release_page(struct page_pool *pool, + struct page *page) +{ +#ifdef CONFIG_PAGE_POOL + page_pool_unmap_page(pool, page); +#endif +} + static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { return page->dma_addr; diff --git a/include/net/xdp.h b/include/net/xdp.h index 8e0deddef35cf..40c6d3398458f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -129,6 +129,21 @@ void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); +/* When sending xdp_frame into the network stack, then there is no + * return point callback, which is needed to release e.g. DMA-mapping + * resources with page_pool. Thus, have explicit function to release + * frame resources. + */ +void __xdp_release_frame(void *data, struct xdp_mem_info *mem); +static inline void xdp_release_frame(struct xdp_frame *xdpf) +{ + struct xdp_mem_info *mem = &xdpf->mem; + + /* Curr only page_pool needs this */ + if (mem->type == MEM_TYPE_PAGE_POOL) + __xdp_release_frame(xdpf->data, mem); +} + int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8ee5532cf6a65..8dff087680879 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -208,6 +208,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * - RX ring dev queue index (skb_record_rx_queue) */ + /* Until page_pool get SKB return path, release DMA here */ + xdp_release_frame(xdpf); + /* Allow SKB to reuse area used by xdp_frame */ xdp_scrub_frame(xdpf); diff --git a/net/core/xdp.c b/net/core/xdp.c index 1d5f2292962cb..0fcc32340c4e9 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -381,6 +381,21 @@ void xdp_return_buff(struct xdp_buff *xdp) } EXPORT_SYMBOL_GPL(xdp_return_buff); +/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ +void __xdp_release_frame(void *data, struct xdp_mem_info *mem) +{ + struct xdp_mem_allocator *xa; + struct page *page; + + rcu_read_lock(); + xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); + page = virt_to_head_page(data); + if (xa) + page_pool_release_page(xa->page_pool, page); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(__xdp_release_frame); + int xdp_attachment_query(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { From cbf3351067cea1a58862b2c7667b479a358a2071 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:32 +0200 Subject: [PATCH 05/12] veth: use xdp_release_frame for XDP_PASS Like cpumap use xdp_release_frame() when an xdp_frame got converted into an SKB and send towars the network stack. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/veth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 52110e54e621f..c6916bf1017bc 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -547,6 +547,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, goto err; } + xdp_release_frame(frame); xdp_scrub_frame(frame); skb->protocol = eth_type_trans(skb, rq->dev); err: From e54cfd7e1745e52eb6c67ee9c77aefb8e4666a88 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:37 +0200 Subject: [PATCH 06/12] page_pool: introduce page_pool_free and use in mlx5 In case driver fails to register the page_pool with XDP return API (via xdp_rxq_info_reg_mem_model()), then the driver can free the page_pool resources more directly than calling page_pool_destroy(), which does a unnecessarily RCU free procedure. This patch is preparing for removing page_pool_destroy(), from driver invocation. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++--- include/net/page_pool.h | 11 +++++++++++ net/core/page_pool.c | 15 +++++++++++---- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a8e8350b38aa0..46323709ad471 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -545,8 +545,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); - if (err) + if (err) { + page_pool_free(rq->page_pool); goto err_free; + } for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -611,8 +613,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); return err; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index e240fac4c5b94..754d980700dff 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -111,6 +111,17 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); void page_pool_destroy(struct page_pool *pool); +void __page_pool_free(struct page_pool *pool); +static inline void page_pool_free(struct page_pool *pool) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + __page_pool_free(pool); +#endif +} + /* Never call this directly, use helpers below */ void __page_pool_put_page(struct page_pool *pool, struct page *page, bool allow_direct); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 205af7bd6d098..41391b5dc14c9 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -292,17 +292,24 @@ static void __page_pool_empty_ring(struct page_pool *pool) } } +void __page_pool_free(struct page_pool *pool) +{ + WARN(pool->alloc.count, "API usage violation"); + WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty"); + + ptr_ring_cleanup(&pool->ring, NULL); + kfree(pool); +} +EXPORT_SYMBOL(__page_pool_free); + static void __page_pool_destroy_rcu(struct rcu_head *rcu) { struct page_pool *pool; pool = container_of(rcu, struct page_pool, rcu); - WARN(pool->alloc.count, "API usage violation"); - __page_pool_empty_ring(pool); - ptr_ring_cleanup(&pool->ring, NULL); - kfree(pool); + __page_pool_free(pool); } /* Cleanup and release resources */ From 29b006a67634e389ebc1b0c45e72a84d60118d6f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:42 +0200 Subject: [PATCH 07/12] mlx5: more strict use of page_pool API The mlx5 driver is using page_pool, but not for DMA-mapping (currently), and is a little too relaxed about returning or releasing page resources, as it is not strictly necessary, when not using DMA-mappings. As this patchset is working towards tracking page_pool resources, to know about in-flight frames on shutdown. Then fix places where mlx5 leak page_pool resource. In case of dma_mapping_error, then recycle into page_pool. In mlx5e_free_rq() moved the page_pool_destroy() call to after the mlx5e_page_release() calls, as it is more correct. In mlx5e_page_release() when no recycle was requested, then release page from the page_pool, via page_pool_release_page(). Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 46323709ad471..46b6a47bd1e3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -625,10 +625,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); - xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); - switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); @@ -645,6 +641,11 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5e_page_release(rq, dma_info, false); } + + xdp_rxq_info_unreg(&rq->xdp_rxq); + if (rq->page_pool) + page_pool_destroy(rq->page_pool); + mlx5_wq_destroy(&rq->wq_ctrl); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 935bf62eddc1c..234a3fd399013 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -248,7 +248,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, PAGE_SIZE, rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(dma_info->page); + page_pool_recycle_direct(rq->page_pool, dma_info->page); dma_info->page = NULL; return -ENOMEM; } @@ -272,6 +272,7 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, page_pool_recycle_direct(rq->page_pool, dma_info->page); } else { mlx5e_page_dma_unmap(rq, dma_info); + page_pool_release_page(rq->page_pool, dma_info->page); put_page(dma_info->page); } } From 99c07c43c4ea0bc101331401a0fabfc51933c6a3 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:47 +0200 Subject: [PATCH 08/12] xdp: tracking page_pool resources and safe removal This patch is needed before we can allow drivers to use page_pool for DMA-mappings. Today with page_pool and XDP return API, it is possible to remove the page_pool object (from rhashtable), while there are still in-flight packet-pages. This is safely handled via RCU and failed lookups in __xdp_return() fallback to call put_page(), when page_pool object is gone. In-case page is still DMA mapped, this will result in page note getting correctly DMA unmapped. To solve this, the page_pool is extended with tracking in-flight pages. And XDP disconnect system queries page_pool and waits, via workqueue, for all in-flight pages to be returned. To avoid killing performance when tracking in-flight pages, the implement use two (unsigned) counters, that in placed on different cache-lines, and can be used to deduct in-flight packets. This is done by mapping the unsigned "sequence" counters onto signed Two's complement arithmetic operations. This is e.g. used by kernel's time_after macros, described in kernel commit 1ba3aab3033b and 5a581b367b5, and also explained in RFC1982. The trick is these two incrementing counters only need to be read and compared, when checking if it's safe to free the page_pool structure. Which will only happen when driver have disconnected RX/alloc side. Thus, on a non-fast-path. It is chosen that page_pool tracking is also enabled for the non-DMA use-case, as this can be used for statistics later. After this patch, using page_pool requires more strict resource "release", e.g. via page_pool_release_page() that was introduced in this patchset, and previous patches implement/fix this more strict requirement. Drivers no-longer call page_pool_destroy(). Drivers already call xdp_rxq_info_unreg() which call xdp_rxq_info_unreg_mem_model(), which will attempt to disconnect the mem id, and if attempt fails schedule the disconnect for later via delayed workqueue. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 3 - include/net/page_pool.h | 41 +++++++++--- net/core/page_pool.c | 62 +++++++++++++----- net/core/xdp.c | 65 +++++++++++++++++-- 4 files changed, 136 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 46b6a47bd1e3a..5e40db8f92e61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -643,9 +643,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) } xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); - mlx5_wq_destroy(&rq->wq_ctrl); } diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 754d980700dff..f09b3f1994e60 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -16,14 +16,16 @@ * page_pool_alloc_pages() call. Drivers should likely use * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). * - * If page_pool handles DMA mapping (use page->private), then API user - * is responsible for invoking page_pool_put_page() once. In-case of - * elevated refcnt, the DMA state is released, assuming other users of - * the page will eventually call put_page(). + * API keeps track of in-flight pages, in-order to let API user know + * when it is safe to dealloactor page_pool object. Thus, API users + * must make sure to call page_pool_release_page() when a page is + * "leaving" the page_pool. Or call page_pool_put_page() where + * appropiate. For maintaining correct accounting. * - * If no DMA mapping is done, then it can act as shim-layer that - * fall-through to alloc_page. As no state is kept on the page, the - * regular put_page() call is sufficient. + * API user must only call page_pool_put_page() once on a page, as it + * will either recycle the page, or in case of elevated refcnt, it + * will release the DMA mapping and in-flight state accounting. We + * hope to lift this requirement in the future. */ #ifndef _NET_PAGE_POOL_H #define _NET_PAGE_POOL_H @@ -66,9 +68,10 @@ struct page_pool_params { }; struct page_pool { - struct rcu_head rcu; struct page_pool_params p; + u32 pages_state_hold_cnt; + /* * Data structure for allocation side * @@ -96,6 +99,8 @@ struct page_pool { * TODO: Implement bulk return pages into this structure. */ struct ptr_ring ring; + + atomic_t pages_state_release_cnt; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); @@ -109,8 +114,6 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) struct page_pool *page_pool_create(const struct page_pool_params *params); -void page_pool_destroy(struct page_pool *pool); - void __page_pool_free(struct page_pool *pool); static inline void page_pool_free(struct page_pool *pool) { @@ -143,6 +146,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, __page_pool_put_page(pool, page, true); } +/* API user MUST have disconnected alloc-side (not allowed to call + * page_pool_alloc_pages()) before calling this. The free-side can + * still run concurrently, to handle in-flight packet-pages. + * + * A request to shutdown can fail (with false) if there are still + * in-flight packet-pages. + */ +bool __page_pool_request_shutdown(struct page_pool *pool); +static inline bool page_pool_request_shutdown(struct page_pool *pool) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + return __page_pool_request_shutdown(pool); +#endif +} + /* Disconnects a page (from a page_pool). API users can have a need * to disconnect a page (from a page_pool), to allow it to be used as * a regular page (that will eventually be returned to the normal diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 41391b5dc14c9..8679e24fd6657 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -43,6 +43,8 @@ static int page_pool_init(struct page_pool *pool, if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; + atomic_set(&pool->pages_state_release_cnt, 0); + return 0; } @@ -151,6 +153,9 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, page->dma_addr = dma; skip_dma_map: + /* Track how many pages are held 'in-flight' */ + pool->pages_state_hold_cnt++; + /* When page just alloc'ed is should/must have refcnt 1. */ return page; } @@ -173,6 +178,33 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) } EXPORT_SYMBOL(page_pool_alloc_pages); +/* Calculate distance between two u32 values, valid if distance is below 2^(31) + * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution + */ +#define _distance(a, b) (s32)((a) - (b)) + +static s32 page_pool_inflight(struct page_pool *pool) +{ + u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); + u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); + s32 distance; + + distance = _distance(hold_cnt, release_cnt); + + /* TODO: Add tracepoint here */ + return distance; +} + +static bool __page_pool_safe_to_destroy(struct page_pool *pool) +{ + s32 inflight = page_pool_inflight(pool); + + /* The distance should not be able to become negative */ + WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); + + return (inflight == 0); +} + /* Cleanup page_pool state from page */ static void __page_pool_clean_page(struct page_pool *pool, struct page *page) @@ -180,7 +212,7 @@ static void __page_pool_clean_page(struct page_pool *pool, dma_addr_t dma; if (!(pool->p.flags & PP_FLAG_DMA_MAP)) - return; + goto skip_dma_unmap; dma = page->dma_addr; /* DMA unmap */ @@ -188,11 +220,16 @@ static void __page_pool_clean_page(struct page_pool *pool, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); page->dma_addr = 0; +skip_dma_unmap: + atomic_inc(&pool->pages_state_release_cnt); } /* unmap the page and clean our state */ void page_pool_unmap_page(struct page_pool *pool, struct page *page) { + /* When page is unmapped, this implies page will not be + * returned to page_pool. + */ __page_pool_clean_page(pool, page); } EXPORT_SYMBOL(page_pool_unmap_page); @@ -201,6 +238,7 @@ EXPORT_SYMBOL(page_pool_unmap_page); static void __page_pool_return_page(struct page_pool *pool, struct page *page) { __page_pool_clean_page(pool, page); + put_page(page); /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a @@ -296,24 +334,17 @@ void __page_pool_free(struct page_pool *pool) { WARN(pool->alloc.count, "API usage violation"); WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty"); + WARN(!__page_pool_safe_to_destroy(pool), "still in-flight pages"); ptr_ring_cleanup(&pool->ring, NULL); kfree(pool); } EXPORT_SYMBOL(__page_pool_free); -static void __page_pool_destroy_rcu(struct rcu_head *rcu) -{ - struct page_pool *pool; - - pool = container_of(rcu, struct page_pool, rcu); - - __page_pool_empty_ring(pool); - __page_pool_free(pool); -} - -/* Cleanup and release resources */ -void page_pool_destroy(struct page_pool *pool) +/* Request to shutdown: release pages cached by page_pool, and check + * for in-flight pages + */ +bool __page_pool_request_shutdown(struct page_pool *pool) { struct page *page; @@ -331,7 +362,6 @@ void page_pool_destroy(struct page_pool *pool) */ __page_pool_empty_ring(pool); - /* An xdp_mem_allocator can still ref page_pool pointer */ - call_rcu(&pool->rcu, __page_pool_destroy_rcu); + return __page_pool_safe_to_destroy(pool); } -EXPORT_SYMBOL(page_pool_destroy); +EXPORT_SYMBOL(__page_pool_request_shutdown); diff --git a/net/core/xdp.c b/net/core/xdp.c index 0fcc32340c4e9..aae665ccee3f1 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -38,6 +38,7 @@ struct xdp_mem_allocator { }; struct rhash_head node; struct rcu_head rcu; + struct delayed_work defer_wq; }; static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) @@ -79,13 +80,13 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) xa = container_of(rcu, struct xdp_mem_allocator, rcu); + /* Allocator have indicated safe to remove before this is called */ + if (xa->mem.type == MEM_TYPE_PAGE_POOL) + page_pool_free(xa->page_pool); + /* Allow this ID to be reused */ ida_simple_remove(&mem_id_pool, xa->mem.id); - /* Notice, driver is expected to free the *allocator, - * e.g. page_pool, and MUST also use RCU free. - */ - /* Poison memory */ xa->mem.id = 0xFFFF; xa->mem.type = 0xF0F0; @@ -94,6 +95,46 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) kfree(xa); } +bool __mem_id_disconnect(int id) +{ + struct xdp_mem_allocator *xa; + bool safe_to_remove = true; + + mutex_lock(&mem_id_lock); + + xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); + if (!xa) { + mutex_unlock(&mem_id_lock); + WARN(1, "Request remove non-existing id(%d), driver bug?", id); + return true; + } + + /* Detects in-flight packet-pages for page_pool */ + if (xa->mem.type == MEM_TYPE_PAGE_POOL) + safe_to_remove = page_pool_request_shutdown(xa->page_pool); + + if (safe_to_remove && + !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) + call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); + + mutex_unlock(&mem_id_lock); + return safe_to_remove; +} + +#define DEFER_TIME (msecs_to_jiffies(1000)) + +static void mem_id_disconnect_defer_retry(struct work_struct *wq) +{ + struct delayed_work *dwq = to_delayed_work(wq); + struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq); + + if (__mem_id_disconnect(xa->mem.id)) + return; + + /* Still not ready to be disconnected, retry later */ + schedule_delayed_work(&xa->defer_wq, DEFER_TIME); +} + void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) { struct xdp_mem_allocator *xa; @@ -112,16 +153,28 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) if (id == 0) return; + if (__mem_id_disconnect(id)) + return; + + /* Could not disconnect, defer new disconnect attempt to later */ mutex_lock(&mem_id_lock); xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); - if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) - call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); + if (!xa) { + mutex_unlock(&mem_id_lock); + return; + } + INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry); mutex_unlock(&mem_id_lock); + schedule_delayed_work(&xa->defer_wq, DEFER_TIME); } EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); +/* This unregister operation will also cleanup and destroy the + * allocator. The page_pool_free() operation is first called when it's + * safe to remove, possibly deferred to a workqueue. + */ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) { /* Simplify driver cleanup code paths, allow unreg "unused" */ From d956a048cd3fc1ba154101a1a50fb37950081ff6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:53 +0200 Subject: [PATCH 09/12] xdp: force mem allocator removal and periodic warning If bugs exists or are introduced later e.g. by drivers misusing the API, then we want to warn about the issue, such that developer notice. This patch will generate a bit of noise in form of periodic pr_warn every 30 seconds. It is not nice to have this stall warning running forever. Thus, this patch will (after 120 attempts) force disconnect the mem id (from the rhashtable) and free the page_pool object. This will cause fallback to the put_page() as before, which only potentially leak DMA-mappings, if objects are really stuck for this long. In that unlikely case, a WARN_ONCE should show us the call stack. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/page_pool.c | 18 +++++++++++++++++- net/core/xdp.c | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 8679e24fd6657..42c3b0a5a2599 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -330,11 +330,27 @@ static void __page_pool_empty_ring(struct page_pool *pool) } } +static void __warn_in_flight(struct page_pool *pool) +{ + u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); + u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); + s32 distance; + + distance = _distance(hold_cnt, release_cnt); + + /* Drivers should fix this, but only problematic when DMA is used */ + WARN(1, "Still in-flight pages:%d hold:%u released:%u", + distance, hold_cnt, release_cnt); +} + void __page_pool_free(struct page_pool *pool) { WARN(pool->alloc.count, "API usage violation"); WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty"); - WARN(!__page_pool_safe_to_destroy(pool), "still in-flight pages"); + + /* Can happen due to forced shutdown */ + if (!__page_pool_safe_to_destroy(pool)) + __warn_in_flight(pool); ptr_ring_cleanup(&pool->ring, NULL); kfree(pool); diff --git a/net/core/xdp.c b/net/core/xdp.c index aae665ccee3f1..622c81dc7ba8e 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -39,6 +39,9 @@ struct xdp_mem_allocator { struct rhash_head node; struct rcu_head rcu; struct delayed_work defer_wq; + unsigned long defer_start; + unsigned long defer_warn; + int disconnect_cnt; }; static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) @@ -95,7 +98,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) kfree(xa); } -bool __mem_id_disconnect(int id) +bool __mem_id_disconnect(int id, bool force) { struct xdp_mem_allocator *xa; bool safe_to_remove = true; @@ -108,29 +111,47 @@ bool __mem_id_disconnect(int id) WARN(1, "Request remove non-existing id(%d), driver bug?", id); return true; } + xa->disconnect_cnt++; /* Detects in-flight packet-pages for page_pool */ if (xa->mem.type == MEM_TYPE_PAGE_POOL) safe_to_remove = page_pool_request_shutdown(xa->page_pool); - if (safe_to_remove && + /* TODO: Tracepoint will be added here in next-patch */ + + if ((safe_to_remove || force) && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); mutex_unlock(&mem_id_lock); - return safe_to_remove; + return (safe_to_remove|force); } #define DEFER_TIME (msecs_to_jiffies(1000)) +#define DEFER_WARN_INTERVAL (30 * HZ) +#define DEFER_MAX_RETRIES 120 static void mem_id_disconnect_defer_retry(struct work_struct *wq) { struct delayed_work *dwq = to_delayed_work(wq); struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq); + bool force = false; + + if (xa->disconnect_cnt > DEFER_MAX_RETRIES) + force = true; - if (__mem_id_disconnect(xa->mem.id)) + if (__mem_id_disconnect(xa->mem.id, force)) return; + /* Periodic warning */ + if (time_after_eq(jiffies, xa->defer_warn)) { + int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ; + + pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n", + __func__, xa->mem.id, xa->disconnect_cnt, sec); + xa->defer_warn = jiffies + DEFER_WARN_INTERVAL; + } + /* Still not ready to be disconnected, retry later */ schedule_delayed_work(&xa->defer_wq, DEFER_TIME); } @@ -153,7 +174,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) if (id == 0) return; - if (__mem_id_disconnect(id)) + if (__mem_id_disconnect(id, false)) return; /* Could not disconnect, defer new disconnect attempt to later */ @@ -164,6 +185,8 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) mutex_unlock(&mem_id_lock); return; } + xa->defer_start = jiffies; + xa->defer_warn = jiffies + DEFER_WARN_INTERVAL; INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry); mutex_unlock(&mem_id_lock); @@ -388,10 +411,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); page = virt_to_head_page(data); - if (xa) { + if (likely(xa)) { napi_direct &= !xdp_return_frame_no_direct(); page_pool_put_page(xa->page_pool, page, napi_direct); } else { + /* Hopefully stack show who to blame for late return */ + WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id); put_page(page); } rcu_read_unlock(); From f033b688c1ede5ec78c9a718fa9f0b374049bc31 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:58 +0200 Subject: [PATCH 10/12] xdp: add tracepoints for XDP mem These tracepoints make it easier to troubleshoot XDP mem id disconnect. The xdp:mem_disconnect tracepoint cannot be replaced via kprobe. It is placed at the last stable place for the pointer to struct xdp_mem_allocator, just before it's scheduled for RCU removal. It also extract info on 'safe_to_remove' and 'force'. Detailed info about in-flight pages is not available at this layer. The next patch will added tracepoints needed at the page_pool layer for this. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/xdp_priv.h | 23 ++++++++ include/trace/events/xdp.h | 115 +++++++++++++++++++++++++++++++++++++ net/core/xdp.c | 21 ++----- 3 files changed, 143 insertions(+), 16 deletions(-) create mode 100644 include/net/xdp_priv.h diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h new file mode 100644 index 0000000000000..6a8cba6ea79a5 --- /dev/null +++ b/include/net/xdp_priv.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_NET_XDP_PRIV_H__ +#define __LINUX_NET_XDP_PRIV_H__ + +#include + +/* Private to net/core/xdp.c, but used by trace/events/xdp.h */ +struct xdp_mem_allocator { + struct xdp_mem_info mem; + union { + void *allocator; + struct page_pool *page_pool; + struct zero_copy_allocator *zc_alloc; + }; + int disconnect_cnt; + unsigned long defer_start; + struct rhash_head node; + struct rcu_head rcu; + struct delayed_work defer_wq; + unsigned long defer_warn; +}; + +#endif /* __LINUX_NET_XDP_PRIV_H__ */ diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index e95cb86b65cf5..bb5e380e2ef32 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -269,6 +269,121 @@ TRACE_EVENT(xdp_devmap_xmit, __entry->from_ifindex, __entry->to_ifindex, __entry->err) ); +/* Expect users already include , but not xdp_priv.h */ +#include + +#define __MEM_TYPE_MAP(FN) \ + FN(PAGE_SHARED) \ + FN(PAGE_ORDER0) \ + FN(PAGE_POOL) \ + FN(ZERO_COPY) + +#define __MEM_TYPE_TP_FN(x) \ + TRACE_DEFINE_ENUM(MEM_TYPE_##x); +#define __MEM_TYPE_SYM_FN(x) \ + { MEM_TYPE_##x, #x }, +#define __MEM_TYPE_SYM_TAB \ + __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 } +__MEM_TYPE_MAP(__MEM_TYPE_TP_FN) + +TRACE_EVENT(mem_disconnect, + + TP_PROTO(const struct xdp_mem_allocator *xa, + bool safe_to_remove, bool force), + + TP_ARGS(xa, safe_to_remove, force), + + TP_STRUCT__entry( + __field(const struct xdp_mem_allocator *, xa) + __field(u32, mem_id) + __field(u32, mem_type) + __field(const void *, allocator) + __field(bool, safe_to_remove) + __field(bool, force) + __field(int, disconnect_cnt) + ), + + TP_fast_assign( + __entry->xa = xa; + __entry->mem_id = xa->mem.id; + __entry->mem_type = xa->mem.type; + __entry->allocator = xa->allocator; + __entry->safe_to_remove = safe_to_remove; + __entry->force = force; + __entry->disconnect_cnt = xa->disconnect_cnt; + ), + + TP_printk("mem_id=%d mem_type=%s allocator=%p" + " safe_to_remove=%s force=%s disconnect_cnt=%d", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->allocator, + __entry->safe_to_remove ? "true" : "false", + __entry->force ? "true" : "false", + __entry->disconnect_cnt + ) +); + +TRACE_EVENT(mem_connect, + + TP_PROTO(const struct xdp_mem_allocator *xa, + const struct xdp_rxq_info *rxq), + + TP_ARGS(xa, rxq), + + TP_STRUCT__entry( + __field(const struct xdp_mem_allocator *, xa) + __field(u32, mem_id) + __field(u32, mem_type) + __field(const void *, allocator) + __field(const struct xdp_rxq_info *, rxq) + __field(int, ifindex) + ), + + TP_fast_assign( + __entry->xa = xa; + __entry->mem_id = xa->mem.id; + __entry->mem_type = xa->mem.type; + __entry->allocator = xa->allocator; + __entry->rxq = rxq; + __entry->ifindex = rxq->dev->ifindex; + ), + + TP_printk("mem_id=%d mem_type=%s allocator=%p" + " ifindex=%d", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->allocator, + __entry->ifindex + ) +); + +TRACE_EVENT(mem_return_failed, + + TP_PROTO(const struct xdp_mem_info *mem, + const struct page *page), + + TP_ARGS(mem, page), + + TP_STRUCT__entry( + __field(const struct page *, page) + __field(u32, mem_id) + __field(u32, mem_type) + ), + + TP_fast_assign( + __entry->page = page; + __entry->mem_id = mem->id; + __entry->mem_type = mem->type; + ), + + TP_printk("mem_id=%d mem_type=%s page=%p", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->page + ) +); + #endif /* _TRACE_XDP_H */ #include diff --git a/net/core/xdp.c b/net/core/xdp.c index 622c81dc7ba8e..b29d7b513a181 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -14,6 +14,8 @@ #include #include +#include /* struct xdp_mem_allocator */ +#include #define REG_STATE_NEW 0x0 #define REG_STATE_REGISTERED 0x1 @@ -29,21 +31,6 @@ static int mem_id_next = MEM_ID_MIN; static bool mem_id_init; /* false */ static struct rhashtable *mem_id_ht; -struct xdp_mem_allocator { - struct xdp_mem_info mem; - union { - void *allocator; - struct page_pool *page_pool; - struct zero_copy_allocator *zc_alloc; - }; - struct rhash_head node; - struct rcu_head rcu; - struct delayed_work defer_wq; - unsigned long defer_start; - unsigned long defer_warn; - int disconnect_cnt; -}; - static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) { const u32 *k = data; @@ -117,7 +104,7 @@ bool __mem_id_disconnect(int id, bool force) if (xa->mem.type == MEM_TYPE_PAGE_POOL) safe_to_remove = page_pool_request_shutdown(xa->page_pool); - /* TODO: Tracepoint will be added here in next-patch */ + trace_mem_disconnect(xa, safe_to_remove, force); if ((safe_to_remove || force) && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) @@ -385,6 +372,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, mutex_unlock(&mem_id_lock); + trace_mem_connect(xdp_alloc, xdp_rxq); return 0; err: mutex_unlock(&mem_id_lock); @@ -417,6 +405,7 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, } else { /* Hopefully stack show who to blame for late return */ WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id); + trace_mem_return_failed(mem, page); put_page(page); } rcu_read_unlock(); From 32c28f7e413981c7dd4a3ad9bbb1151e4b654261 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:06:03 +0200 Subject: [PATCH 11/12] page_pool: add tracepoints for page_pool with details need by XDP The xdp tracepoints for mem id disconnect don't carry information about, why it was not safe_to_remove. The tracepoint page_pool:page_pool_inflight in this patch can be used for extract this info for further debugging. This patchset also adds tracepoint for the pages_state_* release/hold transitions, including a pointer to the page. This can be used for stats about in-flight pages, or used to debug page leakage via keeping track of page pointer and combining this with kprobe for __put_page(). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/page_pool.h | 87 ++++++++++++++++++++++++++++++++ net/core/net-traces.c | 4 ++ net/core/page_pool.c | 9 +++- 3 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/page_pool.h diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h new file mode 100644 index 0000000000000..47b5ee880aa9f --- /dev/null +++ b/include/trace/events/page_pool.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM page_pool + +#if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_POOL_H + +#include +#include + +#include + +TRACE_EVENT(page_pool_inflight, + + TP_PROTO(const struct page_pool *pool, + s32 inflight, u32 hold, u32 release), + + TP_ARGS(pool, inflight, hold, release), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(s32, inflight) + __field(u32, hold) + __field(u32, release) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->inflight = inflight; + __entry->hold = hold; + __entry->release = release; + ), + + TP_printk("page_pool=%p inflight=%d hold=%u release=%u", + __entry->pool, __entry->inflight, __entry->hold, __entry->release) +); + +TRACE_EVENT(page_pool_state_release, + + TP_PROTO(const struct page_pool *pool, + const struct page *page, u32 release), + + TP_ARGS(pool, page, release), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(const struct page *, page) + __field(u32, release) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->page = page; + __entry->release = release; + ), + + TP_printk("page_pool=%p page=%p release=%u", + __entry->pool, __entry->page, __entry->release) +); + +TRACE_EVENT(page_pool_state_hold, + + TP_PROTO(const struct page_pool *pool, + const struct page *page, u32 hold), + + TP_ARGS(pool, page, hold), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(const struct page *, page) + __field(u32, hold) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->page = page; + __entry->hold = hold; + ), + + TP_printk("page_pool=%p page=%p hold=%u", + __entry->pool, __entry->page, __entry->hold) +); + +#endif /* _TRACE_PAGE_POOL_H */ + +/* This part must be outside protection */ +#include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 470b179d599ec..283ddb2dbc7d3 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -43,6 +43,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); #endif +#if IS_ENABLED(CONFIG_PAGE_POOL) +#include +#endif + #include EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 42c3b0a5a2599..f55ab055d543d 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -4,6 +4,7 @@ * Author: Jesper Dangaard Brouer * Copyright (C) 2016 Red Hat, Inc. */ + #include #include #include @@ -14,6 +15,8 @@ #include #include /* for __put_page() */ +#include + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -156,6 +159,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); + /* When page just alloc'ed is should/must have refcnt 1. */ return page; } @@ -191,7 +196,7 @@ static s32 page_pool_inflight(struct page_pool *pool) distance = _distance(hold_cnt, release_cnt); - /* TODO: Add tracepoint here */ + trace_page_pool_inflight(pool, distance, hold_cnt, release_cnt); return distance; } @@ -222,6 +227,8 @@ static void __page_pool_clean_page(struct page_pool *pool, page->dma_addr = 0; skip_dma_unmap: atomic_inc(&pool->pages_state_release_cnt); + trace_page_pool_state_release(pool, page, + atomic_read(&pool->pages_state_release_cnt)); } /* unmap the page and clean our state */ From f71fec47c2df704c7081f946d7e46fe036a4208b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:06:08 +0200 Subject: [PATCH 12/12] page_pool: make sure struct device is stable For DMA mapping use-case the page_pool keeps a pointer to the struct device, which is used in DMA map/unmap calls. For our in-flight handling, we also need to make sure that the struct device have not disappeared. This is assured via using get_device/put_device API. Signed-off-by: Jesper Dangaard Brouer Reported-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- net/core/page_pool.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f55ab055d543d..b366f59885c1c 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,9 @@ static int page_pool_init(struct page_pool *pool, atomic_set(&pool->pages_state_release_cnt, 0); + if (pool->p.flags & PP_FLAG_DMA_MAP) + get_device(pool->p.dev); + return 0; } @@ -360,6 +364,10 @@ void __page_pool_free(struct page_pool *pool) __warn_in_flight(pool); ptr_ring_cleanup(&pool->ring, NULL); + + if (pool->p.flags & PP_FLAG_DMA_MAP) + put_device(pool->p.dev); + kfree(pool); } EXPORT_SYMBOL(__page_pool_free);