Skip to content

Commit

Permalink
Merge branch 'xdp-page_pool-fixes-and-in-flight-accounting'
Browse files Browse the repository at this point in the history
Jesper Dangaard Brouer says:

====================
xdp: page_pool fixes and in-flight accounting

This patchset fix page_pool API and users, such that drivers can use it for
DMA-mapping. A number of places exist, where the DMA-mapping would not get
released/unmapped, all these are fixed. This occurs e.g. when an xdp_frame
gets converted to an SKB. As network stack doesn't have any callback for XDP
memory models.

The patchset also address a shutdown race-condition. Today removing a XDP
memory model, based on page_pool, is only delayed one RCU grace period. This
isn't enough as redirected xdp_frames can still be in-flight on different
queues (remote driver TX, cpumap or veth).

We stress that when drivers use page_pool for DMA-mapping, then they MUST
use one packet per page. This might change in the future, but more work lies
ahead, before we can lift this restriction.

This patchset change the page_pool API to be more strict, as in-flight page
accounting is added.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jun 19, 2019
2 parents 9371a56 + f71fec4 commit 2a54003
Show file tree
Hide file tree
Showing 12 changed files with 502 additions and 45 deletions.
12 changes: 5 additions & 7 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,8 +545,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
}
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_PAGE_POOL, rq->page_pool);
if (err)
if (err) {
page_pool_free(rq->page_pool);
goto err_free;
}

for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
Expand Down Expand Up @@ -611,8 +613,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
if (rq->page_pool)
page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);

return err;
Expand All @@ -625,10 +625,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);

xdp_rxq_info_unreg(&rq->xdp_rxq);
if (rq->page_pool)
page_pool_destroy(rq->page_pool);

switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
Expand All @@ -645,6 +641,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)

mlx5e_page_release(rq, dma_info, false);
}

xdp_rxq_info_unreg(&rq->xdp_rxq);
mlx5_wq_destroy(&rq->wq_ctrl);
}

Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
PAGE_SIZE, rq->buff.map_dir);
if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
put_page(dma_info->page);
page_pool_recycle_direct(rq->page_pool, dma_info->page);
dma_info->page = NULL;
return -ENOMEM;
}
Expand All @@ -272,6 +272,7 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
page_pool_recycle_direct(rq->page_pool, dma_info->page);
} else {
mlx5e_page_dma_unmap(rq, dma_info);
page_pool_release_page(rq->page_pool, dma_info->page);
put_page(dma_info->page);
}
}
Expand Down
1 change: 1 addition & 0 deletions drivers/net/veth.c
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
goto err;
}

xdp_release_frame(frame);
xdp_scrub_frame(frame);
skb->protocol = eth_type_trans(skb, rq->dev);
err:
Expand Down
69 changes: 60 additions & 9 deletions include/net/page_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@
* page_pool_alloc_pages() call. Drivers should likely use
* page_pool_dev_alloc_pages() replacing dev_alloc_pages().
*
* If page_pool handles DMA mapping (use page->private), then API user
* is responsible for invoking page_pool_put_page() once. In-case of
* elevated refcnt, the DMA state is released, assuming other users of
* the page will eventually call put_page().
* API keeps track of in-flight pages, in-order to let API user know
* when it is safe to dealloactor page_pool object. Thus, API users
* must make sure to call page_pool_release_page() when a page is
* "leaving" the page_pool. Or call page_pool_put_page() where
* appropiate. For maintaining correct accounting.
*
* If no DMA mapping is done, then it can act as shim-layer that
* fall-through to alloc_page. As no state is kept on the page, the
* regular put_page() call is sufficient.
* API user must only call page_pool_put_page() once on a page, as it
* will either recycle the page, or in case of elevated refcnt, it
* will release the DMA mapping and in-flight state accounting. We
* hope to lift this requirement in the future.
*/
#ifndef _NET_PAGE_POOL_H
#define _NET_PAGE_POOL_H
Expand Down Expand Up @@ -66,9 +68,10 @@ struct page_pool_params {
};

struct page_pool {
struct rcu_head rcu;
struct page_pool_params p;

u32 pages_state_hold_cnt;

/*
* Data structure for allocation side
*
Expand Down Expand Up @@ -96,6 +99,8 @@ struct page_pool {
* TODO: Implement bulk return pages into this structure.
*/
struct ptr_ring ring;

atomic_t pages_state_release_cnt;
};

struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
Expand All @@ -109,7 +114,16 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)

struct page_pool *page_pool_create(const struct page_pool_params *params);

void page_pool_destroy(struct page_pool *pool);
void __page_pool_free(struct page_pool *pool);
static inline void page_pool_free(struct page_pool *pool)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
__page_pool_free(pool);
#endif
}

/* Never call this directly, use helpers below */
void __page_pool_put_page(struct page_pool *pool,
Expand All @@ -132,6 +146,43 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
__page_pool_put_page(pool, page, true);
}

/* API user MUST have disconnected alloc-side (not allowed to call
* page_pool_alloc_pages()) before calling this. The free-side can
* still run concurrently, to handle in-flight packet-pages.
*
* A request to shutdown can fail (with false) if there are still
* in-flight packet-pages.
*/
bool __page_pool_request_shutdown(struct page_pool *pool);
static inline bool page_pool_request_shutdown(struct page_pool *pool)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
return __page_pool_request_shutdown(pool);
#endif
}

/* Disconnects a page (from a page_pool). API users can have a need
* to disconnect a page (from a page_pool), to allow it to be used as
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
void page_pool_unmap_page(struct page_pool *pool, struct page *page);
static inline void page_pool_release_page(struct page_pool *pool,
struct page *page)
{
#ifdef CONFIG_PAGE_POOL
page_pool_unmap_page(pool, page);
#endif
}

static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
return page->dma_addr;
}

static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
Expand Down
15 changes: 15 additions & 0 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,21 @@ void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);

/* When sending xdp_frame into the network stack, then there is no
* return point callback, which is needed to release e.g. DMA-mapping
* resources with page_pool. Thus, have explicit function to release
* frame resources.
*/
void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
struct xdp_mem_info *mem = &xdpf->mem;

/* Curr only page_pool needs this */
if (mem->type == MEM_TYPE_PAGE_POOL)
__xdp_release_frame(xdpf->data, mem);
}

int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
Expand Down
23 changes: 23 additions & 0 deletions include/net/xdp_priv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_NET_XDP_PRIV_H__
#define __LINUX_NET_XDP_PRIV_H__

#include <linux/rhashtable.h>

/* Private to net/core/xdp.c, but used by trace/events/xdp.h */
struct xdp_mem_allocator {
struct xdp_mem_info mem;
union {
void *allocator;
struct page_pool *page_pool;
struct zero_copy_allocator *zc_alloc;
};
int disconnect_cnt;
unsigned long defer_start;
struct rhash_head node;
struct rcu_head rcu;
struct delayed_work defer_wq;
unsigned long defer_warn;
};

#endif /* __LINUX_NET_XDP_PRIV_H__ */
87 changes: 87 additions & 0 deletions include/trace/events/page_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM page_pool

#if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PAGE_POOL_H

#include <linux/types.h>
#include <linux/tracepoint.h>

#include <net/page_pool.h>

TRACE_EVENT(page_pool_inflight,

TP_PROTO(const struct page_pool *pool,
s32 inflight, u32 hold, u32 release),

TP_ARGS(pool, inflight, hold, release),

TP_STRUCT__entry(
__field(const struct page_pool *, pool)
__field(s32, inflight)
__field(u32, hold)
__field(u32, release)
),

TP_fast_assign(
__entry->pool = pool;
__entry->inflight = inflight;
__entry->hold = hold;
__entry->release = release;
),

TP_printk("page_pool=%p inflight=%d hold=%u release=%u",
__entry->pool, __entry->inflight, __entry->hold, __entry->release)
);

TRACE_EVENT(page_pool_state_release,

TP_PROTO(const struct page_pool *pool,
const struct page *page, u32 release),

TP_ARGS(pool, page, release),

TP_STRUCT__entry(
__field(const struct page_pool *, pool)
__field(const struct page *, page)
__field(u32, release)
),

TP_fast_assign(
__entry->pool = pool;
__entry->page = page;
__entry->release = release;
),

TP_printk("page_pool=%p page=%p release=%u",
__entry->pool, __entry->page, __entry->release)
);

TRACE_EVENT(page_pool_state_hold,

TP_PROTO(const struct page_pool *pool,
const struct page *page, u32 hold),

TP_ARGS(pool, page, hold),

TP_STRUCT__entry(
__field(const struct page_pool *, pool)
__field(const struct page *, page)
__field(u32, hold)
),

TP_fast_assign(
__entry->pool = pool;
__entry->page = page;
__entry->hold = hold;
),

TP_printk("page_pool=%p page=%p hold=%u",
__entry->pool, __entry->page, __entry->hold)
);

#endif /* _TRACE_PAGE_POOL_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
Loading

0 comments on commit 2a54003

Please sign in to comment.