Skip to content

Commit

Permalink
hv_netvsc: Add XDP support
Browse files Browse the repository at this point in the history
This patch adds support of XDP in native mode for hv_netvsc driver, and
transparently sets the XDP program on the associated VF NIC as well.

Setting / unsetting XDP program on synthetic NIC (netvsc) propagates to
VF NIC automatically. Setting / unsetting XDP program on VF NIC directly
is not recommended, also not propagated to synthetic NIC, and may be
overwritten by setting of synthetic NIC.

The Azure/Hyper-V synthetic NIC receive buffer doesn't provide headroom
for XDP. We thought about re-use the RNDIS header space, but it's too
small. So we decided to copy the packets to a page buffer for XDP. And,
most of our VMs on Azure have Accelerated  Network (SRIOV) enabled, so
most of the packets run on VF NIC. The synthetic NIC is considered as a
fallback data-path. So the data copy on netvsc won't impact performance
significantly.

XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO
before running XDP:
        ethtool -K eth0 lro off

XDP actions not yet supported:
        XDP_REDIRECT

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Haiyang Zhang authored and David S. Miller committed Jan 25, 2020
1 parent 6ec8b6c commit 351e158
Show file tree
Hide file tree
Showing 6 changed files with 409 additions and 39 deletions.
2 changes: 1 addition & 1 deletion drivers/net/hyperv/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o

hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o
hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o netvsc_bpf.o
21 changes: 20 additions & 1 deletion drivers/net/hyperv/hyperv_net.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ struct netvsc_device_info {
u32 send_section_size;
u32 recv_section_size;

struct bpf_prog *bprog;

u8 rss_key[NETVSC_HASH_KEYLEN];
};

Expand Down Expand Up @@ -189,7 +191,8 @@ int netvsc_send(struct net_device *net,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer *page_buffer,
struct sk_buff *skb);
struct sk_buff *skb,
bool xdp_tx);
void netvsc_linkstatus_callback(struct net_device *net,
struct rndis_message *resp);
int netvsc_recv_callback(struct net_device *net,
Expand All @@ -198,6 +201,16 @@ int netvsc_recv_callback(struct net_device *net,
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);

u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
struct xdp_buff *xdp);
unsigned int netvsc_xdp_fraglen(unsigned int len);
struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev);
int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack,
struct netvsc_device *nvdev);
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog);
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf);

int rndis_set_subchannel(struct net_device *ndev,
struct netvsc_device *nvdev,
struct netvsc_device_info *dev_info);
Expand Down Expand Up @@ -832,6 +845,8 @@ struct nvsp_message {
#define RNDIS_MAX_PKT_DEFAULT 8
#define RNDIS_PKT_ALIGN_DEFAULT 8

#define NETVSC_XDP_HDRM 256

struct multi_send_data {
struct sk_buff *skb; /* skb containing the pkt */
struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
Expand Down Expand Up @@ -867,6 +882,7 @@ struct netvsc_stats {
u64 bytes;
u64 broadcast;
u64 multicast;
u64 xdp_drop;
struct u64_stats_sync syncp;
};

Expand Down Expand Up @@ -972,6 +988,9 @@ struct netvsc_channel {
atomic_t queue_sends;
struct nvsc_rsc rsc;

struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;

struct netvsc_stats tx_stats;
struct netvsc_stats rx_stats;
};
Expand Down
31 changes: 26 additions & 5 deletions drivers/net/hyperv/netvsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,10 @@ static void free_netvsc_device(struct rcu_head *head)
vfree(nvdev->send_buf);
kfree(nvdev->send_section_map);

for (i = 0; i < VRSS_CHANNEL_MAX; i++)
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
vfree(nvdev->chan_table[i].mrc.slots);
}

kfree(nvdev);
}
Expand Down Expand Up @@ -900,7 +902,8 @@ int netvsc_send(struct net_device *ndev,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer *pb,
struct sk_buff *skb)
struct sk_buff *skb,
bool xdp_tx)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct netvsc_device *net_device
Expand All @@ -923,10 +926,11 @@ int netvsc_send(struct net_device *ndev,
packet->send_buf_index = NETVSC_INVALID_INDEX;
packet->cp_partial = false;

/* Send control message directly without accessing msd (Multi-Send
* Data) field which may be changed during data packet processing.
/* Send a control message or XDP packet directly without accessing
* msd (Multi-Send Data) field which may be changed during data packet
* processing.
*/
if (!skb)
if (!skb || xdp_tx)
return netvsc_send_pkt(device, packet, net_device, pb, skb);

/* batch packets in send buffer if possible */
Expand Down Expand Up @@ -1392,6 +1396,21 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
nvchan->net_device = net_device;
u64_stats_init(&nvchan->tx_stats.syncp);
u64_stats_init(&nvchan->rx_stats.syncp);

ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i);

if (ret) {
netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret);
goto cleanup2;
}

ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL);

if (ret) {
netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret);
goto cleanup2;
}
}

/* Enable NAPI handler before init callbacks */
Expand Down Expand Up @@ -1437,6 +1456,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,

cleanup:
netif_napi_del(&net_device->chan_table[0].napi);

cleanup2:
free_netvsc_device(&net_device->rcu);

return ERR_PTR(ret);
Expand Down
209 changes: 209 additions & 0 deletions drivers/net/hyperv/netvsc_bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2019, Microsoft Corporation.
*
* Author:
* Haiyang Zhang <haiyangz@microsoft.com>
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/kernel.h>
#include <net/xdp.h>

#include <linux/mutex.h>
#include <linux/rtnetlink.h>

#include "hyperv_net.h"

u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
struct xdp_buff *xdp)
{
void *data = nvchan->rsc.data[0];
u32 len = nvchan->rsc.len[0];
struct page *page = NULL;
struct bpf_prog *prog;
u32 act = XDP_PASS;

xdp->data_hard_start = NULL;

rcu_read_lock();
prog = rcu_dereference(nvchan->bpf_prog);

if (!prog)
goto out;

/* allocate page buffer for data */
page = alloc_page(GFP_ATOMIC);
if (!page) {
act = XDP_DROP;
goto out;
}

xdp->data_hard_start = page_address(page);
xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM;
xdp_set_data_meta_invalid(xdp);
xdp->data_end = xdp->data + len;
xdp->rxq = &nvchan->xdp_rxq;
xdp->handle = 0;

memcpy(xdp->data, data, len);

act = bpf_prog_run_xdp(prog, xdp);

switch (act) {
case XDP_PASS:
case XDP_TX:
case XDP_DROP:
break;

case XDP_ABORTED:
trace_xdp_exception(ndev, prog, act);
break;

default:
bpf_warn_invalid_xdp_action(act);
}

out:
rcu_read_unlock();

if (page && act != XDP_PASS && act != XDP_TX) {
__free_page(page);
xdp->data_hard_start = NULL;
}

return act;
}

unsigned int netvsc_xdp_fraglen(unsigned int len)
{
return SKB_DATA_ALIGN(len) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}

struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev)
{
return rtnl_dereference(nvdev->chan_table[0].bpf_prog);
}

int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack,
struct netvsc_device *nvdev)
{
struct bpf_prog *old_prog;
int buf_max, i;

old_prog = netvsc_xdp_get(nvdev);

if (!old_prog && !prog)
return 0;

buf_max = NETVSC_XDP_HDRM + netvsc_xdp_fraglen(dev->mtu + ETH_HLEN);
if (prog && buf_max > PAGE_SIZE) {
netdev_err(dev, "XDP: mtu:%u too large, buf_max:%u\n",
dev->mtu, buf_max);
NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");

return -EOPNOTSUPP;
}

if (prog && (dev->features & NETIF_F_LRO)) {
netdev_err(dev, "XDP: not support LRO\n");
NL_SET_ERR_MSG_MOD(extack, "XDP: not support LRO");

return -EOPNOTSUPP;
}

if (prog)
bpf_prog_add(prog, nvdev->num_chn);

for (i = 0; i < nvdev->num_chn; i++)
rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog);

if (old_prog)
for (i = 0; i < nvdev->num_chn; i++)
bpf_prog_put(old_prog);

return 0;
}

int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
{
struct netdev_bpf xdp;
bpf_op_t ndo_bpf;

ASSERT_RTNL();

if (!vf_netdev)
return 0;

ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
if (!ndo_bpf)
return 0;

memset(&xdp, 0, sizeof(xdp));

xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;

return ndo_bpf(vf_netdev, &xdp);
}

static u32 netvsc_xdp_query(struct netvsc_device *nvdev)
{
struct bpf_prog *prog = netvsc_xdp_get(nvdev);

if (prog)
return prog->aux->id;

return 0;
}

int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
struct net_device_context *ndevctx = netdev_priv(dev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
struct netlink_ext_ack *extack = bpf->extack;
int ret;

if (!nvdev || nvdev->destroy) {
if (bpf->command == XDP_QUERY_PROG) {
bpf->prog_id = 0;
return 0; /* Query must always succeed */
} else {
return -ENODEV;
}
}

switch (bpf->command) {
case XDP_SETUP_PROG:
ret = netvsc_xdp_set(dev, bpf->prog, extack, nvdev);

if (ret)
return ret;

ret = netvsc_vf_setxdp(vf_netdev, bpf->prog);

if (ret) {
netdev_err(dev, "vf_setxdp failed:%d\n", ret);
NL_SET_ERR_MSG_MOD(extack, "vf_setxdp failed");

netvsc_xdp_set(dev, NULL, extack, nvdev);
}

return ret;

case XDP_QUERY_PROG:
bpf->prog_id = netvsc_xdp_query(nvdev);
return 0;

default:
return -EINVAL;
}
}
Loading

0 comments on commit 351e158

Please sign in to comment.