Skip to content

Commit

Permalink
net/mlx5e: Fix usage of DMA sync API
Browse files Browse the repository at this point in the history
DMA sync functions should use the same direction that was used by DMA
mapping. Use DMA_BIDIRECTIONAL for XDP_TX from regular RQ, which reuses
the same mapping that was used for RX, and DMA_TO_DEVICE for XDP_TX from
XSK RQ and XDP_REDIRECT, which establish a new mapping in this
direction. On the RX side, use the same direction that was used when
setting up the mapping (DMA_BIDIRECTIONAL for XDP, DMA_FROM_DEVICE
otherwise).

Also don't skip sync for device when establishing a DMA_FROM_DEVICE
mapping for RX, as some architectures (ARM) may require invalidating
caches before the device can use the mapping. It doesn't break the
bugfix made in
commit 0b7cfa4 ("net/mlx5e: Fix page DMA map/unmap attributes"),
since the bug happened on unmap.

Fixes: 0b7cfa4 ("net/mlx5e: Fix page DMA map/unmap attributes")
Fixes: b5503b9 ("net/mlx5e: XDP TX forwarding support")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
  • Loading branch information
Maxim Mikityanskiy authored and Saeed Mahameed committed Nov 9, 2022
1 parent f9c955b commit 8d4b475
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
4 changes: 2 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdpi.page.rq = rq;

dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_TO_DEVICE);
dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);

if (unlikely(xdp_frame_has_frags(xdpf))) {
sinfo = xdp_get_shared_info_from_frame(xdpf);
Expand All @@ -131,7 +131,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
skb_frag_off(frag);
len = skb_frag_size(frag);
dma_sync_single_for_device(sq->pdev, addr, len,
DMA_TO_DEVICE);
DMA_BIDIRECTIONAL);
}
}

Expand Down
27 changes: 14 additions & 13 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_uni

addr = page_pool_get_dma_addr(au->page);
/* Non-XSK always uses PAGE_SIZE. */
dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, DMA_FROM_DEVICE);
dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
return true;
}

Expand All @@ -282,8 +282,7 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_u
return -ENOMEM;

/* Non-XSK always uses PAGE_SIZE. */
addr = dma_map_page_attrs(rq->pdev, au->page, 0, PAGE_SIZE,
rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC);
addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
if (unlikely(dma_mapping_error(rq->pdev, addr))) {
page_pool_recycle_direct(rq->page_pool, au->page);
au->page = NULL;
Expand Down Expand Up @@ -427,22 +426,24 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
{
dma_addr_t addr = page_pool_get_dma_addr(au->page);

dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, DMA_FROM_DEVICE);
dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
rq->buff.map_dir);
page_ref_inc(au->page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
au->page, frag_offset, len, truesize);
}

static inline void
mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
struct page *page, dma_addr_t addr,
int offset_from, int dma_offset, u32 headlen)
{
const void *from = page_address(page) + offset_from;
/* Aligning len to sizeof(long) optimizes memcpy performance */
unsigned int len = ALIGN(headlen, sizeof(long));

dma_sync_single_for_cpu(pdev, addr + dma_offset, len, DMA_FROM_DEVICE);
dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len,
rq->buff.map_dir);
skb_copy_to_linear_data(skb, from, len);
}

Expand Down Expand Up @@ -1538,7 +1539,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,

addr = page_pool_get_dma_addr(au->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
frag_size, rq->buff.map_dir);
net_prefetch(data);

prog = rcu_dereference(rq->xdp_prog);
Expand Down Expand Up @@ -1587,7 +1588,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi

addr = page_pool_get_dma_addr(au->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
rq->buff.frame0_sz, DMA_FROM_DEVICE);
rq->buff.frame0_sz, rq->buff.map_dir);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(va + rx_headroom);

Expand All @@ -1608,7 +1609,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi

addr = page_pool_get_dma_addr(au->page);
dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
frag_consumed_bytes, DMA_FROM_DEVICE);
frag_consumed_bytes, rq->buff.map_dir);

if (!xdp_buff_has_frags(&xdp)) {
/* Init on the first fragment to avoid cold cache access
Expand Down Expand Up @@ -1905,7 +1906,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
/* copy header */
addr = page_pool_get_dma_addr(head_au->page);
mlx5e_copy_skb_header(rq->pdev, skb, head_au->page, addr,
mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
head_offset, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
Expand Down Expand Up @@ -1939,7 +1940,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,

addr = page_pool_get_dma_addr(au->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
frag_size, DMA_FROM_DEVICE);
frag_size, rq->buff.map_dir);
net_prefetch(data);

prog = rcu_dereference(rq->xdp_prog);
Expand Down Expand Up @@ -1987,7 +1988,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,

if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
/* build SKB around header */
dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE);
dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir);
prefetchw(hdr);
prefetch(data);
skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);
Expand All @@ -2009,7 +2010,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
}

prefetchw(skb->data);
mlx5e_copy_skb_header(rq->pdev, skb, head->page, head->addr,
mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
head_offset + rx_headroom,
rx_headroom, head_size);
/* skb linear part was allocated with headlen and aligned to long */
Expand Down

0 comments on commit 8d4b475

Please sign in to comment.